mirror of
https://github.com/bab2min/Kiwi.git
synced 2026-06-17 01:54:27 +00:00
612 lines
18 KiB
C++
612 lines
18 KiB
C++
#include <string>
|
|
#include <regex>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
|
|
#include <kiwi/Utils.h>
|
|
#include "../src/StrUtils.h"
|
|
#include "Evaluator.h"
|
|
#include "toolUtils.h"
|
|
#include "LCS.hpp"
|
|
|
|
using namespace std;
|
|
using namespace kiwi;
|
|
|
|
unique_ptr<Evaluator> Evaluator::create(const std::string& evalType)
|
|
{
|
|
if (evalType == "morph") return std::make_unique<MorphEvaluator>();
|
|
if (evalType == "disamb") return std::make_unique<DisambEvaluator>();
|
|
if (evalType == "noun") return std::make_unique<NounEvaluator>();
|
|
throw runtime_error{ "Unknown Evaluator Type" };
|
|
}
|
|
|
|
inline ostream& operator<<(ostream& o, const kiwi::TokenInfo& t)
|
|
{
|
|
o << utf16To8(t.str);
|
|
if (t.senseId) o << "__" << (int)t.senseId;
|
|
o << "/" << kiwi::tagToString(t.tag);
|
|
return o;
|
|
}
|
|
|
|
inline TokenInfo parseWordPOS(const u16string& str)
|
|
{
|
|
auto p = str.rfind('/');
|
|
if (p == str.npos) return {};
|
|
u16string form;
|
|
auto f = str.rfind(u"__", p);
|
|
if (f != str.npos) form = str.substr(0, f);
|
|
else form = str.substr(0, p);
|
|
|
|
form = replace(u16string_view{ form.data(), form.size() }, u"_", u" ");
|
|
if (str[p + 1] == 'E')
|
|
{
|
|
if (form[0] == u'아' || form[0] == u'여') form[0] = u'어';
|
|
if (form[0] == u'았' || form[0] == u'였') form[0] = u'었';
|
|
}
|
|
switch (form[0])
|
|
{
|
|
case u'\u3134': // ㄴ
|
|
form[0] = u'\u11AB'; break;
|
|
case u'\u3139': // ㄹ
|
|
form[0] = u'\u11AF'; break;
|
|
case u'\u3141': // ㅁ
|
|
form[0] = u'\u11B7'; break;
|
|
case u'\u3142': // ㅂ
|
|
form[0] = u'\u11B8'; break;
|
|
}
|
|
u16string tagStr = str.substr(p + 1);
|
|
if (tagStr.find('-') != tagStr.npos)
|
|
{
|
|
tagStr.erase(tagStr.begin() + tagStr.find('-'), tagStr.end());
|
|
}
|
|
POSTag tag = toPOSTag(tagStr);
|
|
if (clearIrregular(tag) >= POSTag::max) throw runtime_error{ "Wrong Input '" + utf16To8(str.substr(p + 1)) + "'" };
|
|
return { form, tag, 0, 0 };
|
|
}
|
|
|
|
int Evaluator::operator()(const string& modelPath,
|
|
const string& output,
|
|
const vector<string>& input,
|
|
bool normCoda, bool zCoda, bool defaultDict, bool multiDict, ModelType modelType,
|
|
float typoCostWeight, bool bTypo, bool cTypo, bool lTypo,
|
|
Dialect allowedDialect,
|
|
Match oovScoringType,
|
|
float unkFormScoreScale, float unkFormScoreBias,
|
|
bool oldSplitter,
|
|
int repeat)
|
|
{
|
|
try
|
|
{
|
|
if (typoCostWeight > 0 && !bTypo && !cTypo && !lTypo)
|
|
{
|
|
bTypo = true;
|
|
}
|
|
else if (typoCostWeight == 0)
|
|
{
|
|
bTypo = false;
|
|
cTypo = false;
|
|
lTypo = false;
|
|
}
|
|
|
|
tutils::Timer timer;
|
|
auto option = (BuildOption::default_ & ~BuildOption::loadDefaultDict & ~BuildOption::loadMultiDict)
|
|
| (defaultDict ? BuildOption::loadDefaultDict : BuildOption::none)
|
|
| (multiDict ? BuildOption::loadMultiDict : BuildOption::none);
|
|
PreparedTypoTransformer ptt;
|
|
auto typo = getDefaultTypoSet(DefaultTypoSet::withoutTypo);
|
|
|
|
string typoStr = "";
|
|
if (bTypo)
|
|
{
|
|
typo |= getDefaultTypoSet(DefaultTypoSet::basicTypoSet);
|
|
typoStr += "basic";
|
|
}
|
|
|
|
if (cTypo)
|
|
{
|
|
typo |= getDefaultTypoSet(DefaultTypoSet::continualTypoSet);
|
|
if (!typoStr.empty()) typoStr += "+";
|
|
typoStr += "continual";
|
|
}
|
|
|
|
if (lTypo)
|
|
{
|
|
typo |= getDefaultTypoSet(DefaultTypoSet::lengtheningTypoSet);
|
|
if (!typoStr.empty()) typoStr += "+";
|
|
typoStr += "lengthening";
|
|
}
|
|
|
|
if (allowedDialect != Dialect::standard)
|
|
{
|
|
typo |= getDefaultTypoSet(DefaultTypoSet::dialect);
|
|
if (!typoStr.empty()) typoStr += "+";
|
|
typoStr += "dialect";
|
|
}
|
|
Kiwi kw;
|
|
|
|
if (oldSplitter)
|
|
{
|
|
kw = KiwiBuilder{ modelPath, 1, option, modelType, allowedDialect }.build(
|
|
typo
|
|
);
|
|
}
|
|
else
|
|
{
|
|
kw = KiwiBuilder{ modelPath, 1, option, modelType, allowedDialect }.build();
|
|
ptt = typo.prepare(true);
|
|
}
|
|
|
|
if (typoCostWeight > 0)
|
|
{
|
|
auto config = kw.getGlobalConfig();
|
|
config.typoCostWeight = typoCostWeight;
|
|
kw.setGlobalConfig(config);
|
|
}
|
|
|
|
if (isfinite(unkFormScoreScale))
|
|
{
|
|
auto config = kw.getGlobalConfig();
|
|
config.oovRuleScale = unkFormScoreScale;
|
|
kw.setGlobalConfig(config);
|
|
}
|
|
if (isfinite(unkFormScoreBias))
|
|
{
|
|
auto config = kw.getGlobalConfig();
|
|
if (oovScoringType == Match::oovRuleOnly)
|
|
{
|
|
config.oovRuleBias = unkFormScoreBias;
|
|
}
|
|
else
|
|
{
|
|
config.oovChrBias = unkFormScoreBias;
|
|
}
|
|
kw.setGlobalConfig(config);
|
|
}
|
|
|
|
cout << "Loading Time : " << timer.getElapsed() << " ms" << endl;
|
|
cout << "ArchType : " << archToStr(kw.archType()) << endl;
|
|
cout << "Model Type : " << modelTypeToStr(kw.modelType()) << endl;
|
|
if (kw.getLangModel())
|
|
{
|
|
cout << "LM Size : " << (kw.getLangModel()->getMemorySize() / 1024. / 1024.) << " MB" << endl;
|
|
}
|
|
cout << "OOV Scoring : " << tutils::oovScoringTypeToStr(oovScoringType) << endl;
|
|
cout << "Typo Correction: " << (typoStr.empty() ? "none" : typoStr) << endl;
|
|
cout << "Mem Usage : " << (tutils::getCurrentPhysicalMemoryUsage() / 1024.) << " MB\n" << endl;
|
|
|
|
double avgMicro = 0, avgMacro = 0;
|
|
double cnt = 0;
|
|
AnalyzeOption analyzeOption;
|
|
analyzeOption.match = (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda);
|
|
analyzeOption.match |= oovScoringType;
|
|
analyzeOption.allowedDialects = allowedDialect;
|
|
if (oldSplitter)
|
|
{
|
|
analyzeOption.match |= Match::useOldSplitter;
|
|
}
|
|
else
|
|
{
|
|
analyzeOption.typoTransformer = &ptt;
|
|
}
|
|
|
|
for (auto& tf : input)
|
|
{
|
|
cout << "Test file: " << tf << endl;
|
|
try
|
|
{
|
|
auto result = eval(output, tf, kw, analyzeOption, repeat);
|
|
avgMicro += result.first;
|
|
avgMacro += result.second;
|
|
++cnt;
|
|
cout << "================" << endl;
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
cerr << e.what() << endl;
|
|
}
|
|
}
|
|
|
|
cout << endl << "================" << endl;
|
|
cout << "Avg Score" << endl;
|
|
cout << avgMicro / cnt << ", " << avgMacro / cnt << endl;
|
|
cout << "================" << endl;
|
|
return 0;
|
|
}
|
|
catch (const exception& e)
|
|
{
|
|
cerr << e.what() << endl;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
auto MorphEvaluator::loadTestset(const string& testSetFile) const -> vector<TestResult>
|
|
{
|
|
vector<TestResult> ret;
|
|
ifstream f{ testSetFile };
|
|
if (!f) throw std::ios_base::failure{ "Cannot open '" + testSetFile + "'" };
|
|
string line;
|
|
while (getline(f, line))
|
|
{
|
|
while (line.back() == '\n' || line.back() == '\r') line.pop_back();
|
|
auto wstr = utf8To16(line);
|
|
auto fd = split(wstr, u'\t');
|
|
if (fd.size() < 2) continue;
|
|
vector<u16string> tokens;
|
|
for (auto s : split(fd[1], u' ')) tokens.emplace_back(s);
|
|
TestResult tr;
|
|
tr.q = u16string{ fd[0] };
|
|
for (auto& t : tokens) tr.a.emplace_back(parseWordPOS(t));
|
|
ret.emplace_back(std::move(tr));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
auto MorphEvaluator::computeScore(vector<TestResult>& preds, vector<TestResult>& errors) const -> Score
|
|
{
|
|
errors.clear();
|
|
|
|
size_t totalCount = 0, microCorrect = 0, microCount = 0;
|
|
double totalScore = 0;
|
|
|
|
for (auto& tr : preds)
|
|
{
|
|
if (tr.a != tr.r)
|
|
{
|
|
auto diff = lcs::getDiff(tr.r.begin(), tr.r.end(), tr.a.begin(), tr.a.end(), [](const TokenInfo& a, const TokenInfo& b)
|
|
{
|
|
if (clearIrregular(a.tag) != clearIrregular(b.tag)) return false;
|
|
if (a.tag == POSTag::jko) return true;
|
|
if (a.str == u"은" && u"ᆫ" == b.str) return true;
|
|
if (b.str == u"은" && u"ᆫ" == a.str) return true;
|
|
if (a.str == u"을" && u"ᆯ" == b.str) return true;
|
|
if (b.str == u"을" && u"ᆯ" == a.str) return true;
|
|
if (a.str == u"음" && u"ᆷ" == b.str) return true;
|
|
if (b.str == u"음" && u"ᆷ" == a.str) return true;
|
|
if (a.str == u"그것" && u"그거" == b.str) return true;
|
|
if (b.str == u"그것" && u"그거" == a.str) return true;
|
|
if (a.str == u"것" && u"거" == b.str) return true;
|
|
if (b.str == u"것" && u"거" == a.str) return true;
|
|
return a.str == b.str;
|
|
});
|
|
size_t common = 0;
|
|
for (auto&& d : diff)
|
|
{
|
|
if (d.first < 0) tr.dr.emplace_back(d.second);
|
|
else if (d.first > 0) tr.da.emplace_back(d.second);
|
|
else common++;
|
|
}
|
|
tr.score = common / (double)diff.size();
|
|
totalScore += tr.score;
|
|
microCorrect += common;
|
|
microCount += diff.size();
|
|
errors.emplace_back(tr);
|
|
}
|
|
else
|
|
{
|
|
totalScore += 1;
|
|
microCorrect += tr.r.size();
|
|
microCount += tr.r.size();
|
|
}
|
|
totalCount++;
|
|
}
|
|
Score ret;
|
|
ret.micro = microCorrect / (double)microCount;
|
|
ret.macro = totalScore / totalCount;
|
|
ret.totalCount = totalCount;
|
|
return ret;
|
|
}
|
|
|
|
void MorphEvaluator::TestResult::writeResult(ostream& out) const
|
|
{
|
|
out << utf16To8(q) << '\t' << score << endl;
|
|
for (auto& _r : da)
|
|
{
|
|
out << _r << '\t';
|
|
}
|
|
out << endl;
|
|
for (auto& _r : dr)
|
|
{
|
|
out << _r << '\t';
|
|
}
|
|
out << endl;
|
|
out << endl;
|
|
}
|
|
|
|
pair<double, double> MorphEvaluator::eval(const string& output, const string& file, Kiwi& kiwi, AnalyzeOption option, int repeat)
|
|
{
|
|
const size_t topN = 1;
|
|
vector<TestResult> testsets = loadTestset(file), errors;
|
|
tutils::Timer total;
|
|
for (int i = 0; i < repeat; ++i)
|
|
{
|
|
for (auto& tr : testsets)
|
|
{
|
|
auto cands = kiwi.analyze(tr.q, topN, option);
|
|
tr.r = cands[0].first;
|
|
}
|
|
}
|
|
double tm = total.getElapsed() / repeat;
|
|
auto score = computeScore(testsets, errors);
|
|
|
|
cout << score.micro << ", " << score.macro << endl;
|
|
cout << "Total (" << score.totalCount << " lines) Time : " << tm << " ms" << endl;
|
|
cout << "Time per Line : " << tm / score.totalCount << " ms" << endl;
|
|
|
|
if (!output.empty())
|
|
{
|
|
const size_t last_slash_idx = file.find_last_of("\\/");
|
|
string name;
|
|
if (last_slash_idx != file.npos) name = file.substr(last_slash_idx + 1);
|
|
else name = file;
|
|
|
|
ofstream out{ output + "/" + name };
|
|
out << score.micro << ", " << score.macro << endl;
|
|
out << "Total (" << score.totalCount << ") Time : " << tm << " ms" << endl;
|
|
out << "Time per Unit : " << tm / score.totalCount << " ms" << endl;
|
|
for (auto t : errors)
|
|
{
|
|
t.writeResult(out);
|
|
}
|
|
}
|
|
return make_pair(score.micro, score.macro);
|
|
}
|
|
|
|
auto DisambEvaluator::loadTestset(const string& testSetFile) const -> vector<TestResult>
|
|
{
|
|
vector<TestResult> ret;
|
|
ifstream f{ testSetFile };
|
|
if (!f) throw std::ios_base::failure{ "Cannot open '" + testSetFile + "'" };
|
|
string line;
|
|
while (getline(f, line))
|
|
{
|
|
while (line.back() == '\n' || line.back() == '\r') line.pop_back();
|
|
auto wstr = utf8To16(line);
|
|
auto fd = split(wstr, u'\t');
|
|
if (fd.size() < 2) continue;
|
|
TestResult tr;
|
|
tr.target = parseWordPOS(u16string{ fd[0] });
|
|
tr.text = u16string{ fd[1] };
|
|
ret.emplace_back(move(tr));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
auto DisambEvaluator::computeScore(vector<TestResult>& preds, vector<TestResult>& errors) const -> Score
|
|
{
|
|
errors.clear();
|
|
Score score;
|
|
for (auto& tr : preds)
|
|
{
|
|
bool correct = false;
|
|
for (auto& token : tr.result.first)
|
|
{
|
|
if (token.str == tr.target.str &&
|
|
clearIrregular(token.tag) == clearIrregular(tr.target.tag))
|
|
{
|
|
correct = true;
|
|
break;
|
|
}
|
|
}
|
|
if (correct) score.acc += 1;
|
|
else errors.emplace_back(tr);
|
|
score.totalCount++;
|
|
}
|
|
score.acc /= score.totalCount;
|
|
return score;
|
|
}
|
|
|
|
void DisambEvaluator::TestResult::writeResult(ostream& out) const
|
|
{
|
|
out << target << '\t' << utf16To8(text) << '\t' << score << endl;
|
|
for (auto& _r : result.first)
|
|
{
|
|
out << _r << '\t';
|
|
}
|
|
out << endl;
|
|
out << endl;
|
|
}
|
|
|
|
pair<double, double> DisambEvaluator::eval(const string& output, const string& file, Kiwi& kiwi, AnalyzeOption option, int repeat)
|
|
{
|
|
const size_t topN = 1;
|
|
vector<TestResult> testsets = loadTestset(file), errors;
|
|
tutils::Timer total;
|
|
for (int i = 0; i < repeat; ++i)
|
|
{
|
|
for (auto& tr : testsets)
|
|
{
|
|
auto cands = kiwi.analyze(tr.text, topN, option);
|
|
tr.result = cands[0];
|
|
}
|
|
}
|
|
double tm = total.getElapsed() / repeat;
|
|
auto score = computeScore(testsets, errors);
|
|
|
|
cout << score.acc << endl;
|
|
cout << "Total (" << score.totalCount << " lines) Time : " << tm << " ms" << endl;
|
|
cout << "Time per Line : " << tm / score.totalCount << " ms" << endl;
|
|
|
|
if (!output.empty())
|
|
{
|
|
const size_t last_slash_idx = file.find_last_of("\\/");
|
|
string name;
|
|
if (last_slash_idx != file.npos) name = file.substr(last_slash_idx + 1);
|
|
else name = file;
|
|
|
|
ofstream out{ output + "/" + name };
|
|
out << score.acc << endl;
|
|
out << "Total (" << score.totalCount << ") Time : " << tm << " ms" << endl;
|
|
out << "Time per Unit : " << tm / score.totalCount << " ms" << endl;
|
|
for (auto t : errors)
|
|
{
|
|
t.writeResult(out);
|
|
}
|
|
}
|
|
return make_pair(score.acc, score.acc);
|
|
}
|
|
|
|
auto NounEvaluator::loadTestset(const string& testSetFile) const -> vector<TestResult>
|
|
{
|
|
vector<TestResult> ret;
|
|
ifstream f{ testSetFile };
|
|
if (!f) throw std::ios_base::failure{ "Cannot open '" + testSetFile + "'" };
|
|
string line;
|
|
|
|
regex nounTagPattern{ "<n(?:\\s+e=\"([^\"]+)\")?>(.+?)</n>" };
|
|
|
|
while (getline(f, line))
|
|
{
|
|
while (line.back() == '\n' || line.back() == '\r') line.pop_back();
|
|
TestResult tr;
|
|
smatch matches;
|
|
auto searchStart = line.cbegin();
|
|
string inputText;
|
|
while (regex_search(searchStart, line.cend(), matches, nounTagPattern))
|
|
{
|
|
inputText.insert(inputText.end(), searchStart, matches[0].first);
|
|
const u16string nounStr = utf8To16(matches[2].str());
|
|
const string labelStr = matches[1].str();
|
|
++tr.golds[nounStr].first;
|
|
tr.golds[nounStr].second = labelStr;
|
|
inputText.insert(inputText.end(), matches[2].first, matches[2].second);
|
|
searchStart = matches[0].second;
|
|
}
|
|
inputText.insert(inputText.end(), searchStart, line.cend());
|
|
tr.text = utf8To16(inputText);
|
|
ret.emplace_back(std::move(tr));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
auto NounEvaluator::computeScore(vector<TestResult>& preds, vector<TestResult>& errors) const -> Score
|
|
{
|
|
errors.clear();
|
|
size_t totalCorrect = 0, totalLabeledCorrect = 0, totalGolds = 0, totalLabeledGolds = 0, totalPreds = 0;
|
|
size_t totalCorrectChr = 0, totalPredsChr = 0, totalGoldsChr = 0;
|
|
for (auto& tr : preds)
|
|
{
|
|
std::unordered_map<u16string, size_t> predCnt;
|
|
for (auto& token : tr.result.first)
|
|
{
|
|
if (token.tag == POSTag::nng || token.tag == POSTag::nnp || token.tag == POSTag::nnb)
|
|
{
|
|
++predCnt[token.str];
|
|
tr.numPredsChr += token.str.size();
|
|
++tr.numPreds;
|
|
}
|
|
}
|
|
size_t numCurrentGoldLabels = 0;
|
|
for (auto& [g, info] : tr.golds)
|
|
{
|
|
auto [cnt, label] = info;
|
|
auto it = predCnt.find(g);
|
|
if (it != predCnt.end())
|
|
{
|
|
size_t matchCnt = min(it->second, cnt);
|
|
tr.correct += matchCnt;
|
|
tr.correctChr += g.size() * matchCnt;
|
|
if (!label.empty())
|
|
{
|
|
tr.labeledCorrect += matchCnt;
|
|
}
|
|
}
|
|
if (!label.empty())
|
|
{
|
|
totalLabeledGolds += cnt;
|
|
numCurrentGoldLabels += cnt;
|
|
}
|
|
totalGolds += cnt;
|
|
totalGoldsChr += g.size() * cnt;
|
|
}
|
|
totalPreds += tr.numPreds;
|
|
totalCorrect += tr.correct;
|
|
totalLabeledCorrect += tr.labeledCorrect;
|
|
totalPredsChr += tr.numPredsChr;
|
|
totalCorrectChr += tr.correctChr;
|
|
if (tr.labeledCorrect < numCurrentGoldLabels) errors.emplace_back(tr);
|
|
}
|
|
Score score;
|
|
score.precision = (totalPreds == 0) ? 0 : (double)totalCorrect / totalPreds;
|
|
score.recall = (totalGolds == 0) ? 0 : (double)totalCorrect / totalGolds;
|
|
score.labeledRecall = (totalLabeledGolds == 0) ? 0 : (double)totalLabeledCorrect / totalLabeledGolds;
|
|
score.f1 = 2 * score.precision * score.recall / max(score.precision + score.recall, 1.);
|
|
|
|
score.precisionChr = (totalPredsChr == 0) ? 0 : (double)totalCorrectChr / totalPredsChr;
|
|
score.recallChr = (totalGoldsChr == 0) ? 0 : (double)totalCorrectChr / totalGoldsChr;
|
|
score.f1Chr = 2 * score.precisionChr * score.recallChr / max(score.precisionChr + score.recallChr, 1.);
|
|
score.totalCount = preds.size();
|
|
return score;
|
|
}
|
|
|
|
void NounEvaluator::TestResult::writeResult(ostream& out) const
|
|
{
|
|
size_t totalGolds = 0;
|
|
size_t labeledGolds = 0;
|
|
for (auto& [g, info] : golds)
|
|
{
|
|
auto [cnt, label] = info;
|
|
if (!label.empty()) labeledGolds += cnt;
|
|
totalGolds += cnt;
|
|
}
|
|
|
|
float precision = (numPreds == 0) ? 0 : (double)correct / numPreds;
|
|
float recall = (totalGolds == 0) ? 0 : (double)correct / totalGolds;
|
|
float f1 = 2 * precision * recall / max(precision + recall, 1e-10f);
|
|
float labeledRecall = (labeledGolds == 0) ? 0 : (double)labeledCorrect / labeledGolds;
|
|
out << utf16To8(text) << '\t' << labeledRecall << '\t' << precision << '\t' << recall << '\t' << f1 << endl;
|
|
out << "Golds:" << '\t';
|
|
for (auto& [g, info] : golds)
|
|
{
|
|
auto [cnt, label] = info;
|
|
out << utf16To8(g) << (label.empty() ? "" : ("/" + label) ) << '\t';
|
|
}
|
|
out << endl;
|
|
for (auto& r : result.first)
|
|
{
|
|
out << r << '\t';
|
|
}
|
|
out << endl;
|
|
out << endl;
|
|
}
|
|
|
|
std::pair<double, double> NounEvaluator::eval(const std::string& output, const std::string& file, kiwi::Kiwi& kiwi, kiwi::AnalyzeOption option, int repeat)
|
|
{
|
|
vector<TestResult> testsets = loadTestset(file), errors;
|
|
tutils::Timer total;
|
|
for (int i = 0; i < repeat; ++i)
|
|
{
|
|
for (auto& tr : testsets)
|
|
{
|
|
auto cands = kiwi.analyze(tr.text, option);
|
|
tr.result = cands;
|
|
}
|
|
}
|
|
double tm = total.getElapsed() / repeat;
|
|
auto score = computeScore(testsets, errors);
|
|
|
|
cout << "Labeled Recall: " << score.labeledRecall << endl;
|
|
cout << "(Morph Level) Precision: " << score.precision << ", Recall: " << score.recall << ", F1: " << score.f1 << endl;
|
|
cout << "(Chr Level) Precision: " << score.precisionChr << ", Recall: " << score.recallChr << ", F1: " << score.f1Chr << endl;
|
|
cout << "Total (" << score.totalCount << " lines) Time : " << tm << " ms" << endl;
|
|
cout << "Time per Line : " << tm / score.totalCount << " ms" << endl;
|
|
|
|
if (!output.empty())
|
|
{
|
|
const size_t last_slash_idx = file.find_last_of("\\/");
|
|
string name;
|
|
if (last_slash_idx != file.npos) name = file.substr(last_slash_idx + 1);
|
|
else name = file;
|
|
|
|
ofstream out{ output + "/" + name };
|
|
out << "Labeled Recall: " << score.labeledRecall << endl;
|
|
out << "(Morph Level) Precision: " << score.precision << ", Recall: " << score.recall << ", F1: " << score.f1 << endl;
|
|
out << "(Chr Level) Precision: " << score.precisionChr << ", Recall: " << score.recallChr << ", F1: " << score.f1Chr << endl;
|
|
out << "Total (" << score.totalCount << ") Time : " << tm << " ms" << endl;
|
|
out << "Time per Unit : " << tm / score.totalCount << " ms" << endl;
|
|
for (auto t : errors)
|
|
{
|
|
t.writeResult(out);
|
|
}
|
|
}
|
|
return make_pair(score.labeledRecall, score.f1Chr);
|
|
}
|