mirror of
https://github.com/bab2min/Kiwi.git
synced 2026-06-17 01:54:27 +00:00
test case for analyzing with pretokenized spans
This commit is contained in:
parent
1fc785c315
commit
2cdaa29286
1 changed files with 42 additions and 0 deletions
|
|
@ -170,6 +170,48 @@ TEST(KiwiCpp, EmptyToken)
|
|||
}
|
||||
}
|
||||
|
||||
TEST(KiwiCpp, Pretokenized)
|
||||
{
|
||||
Kiwi& kiwi = reuseKiwiInstance();
|
||||
auto str = u"드디어패트와 매트가 2017년에 국내 개봉했다. 패트와매트는 2016년...";
|
||||
|
||||
std::vector<TokenInfo> res;
|
||||
{
|
||||
std::vector<PretokenizedSpan> pretokenized = {
|
||||
{ 3, 9, {} },
|
||||
{ 11, 16, {} },
|
||||
{ 34, 39, {} },
|
||||
};
|
||||
|
||||
res = kiwi.analyze(str, Match::allWithNormalizing, nullptr, &pretokenized).first;
|
||||
EXPECT_EQ(res[1].str, u"패트와 매트");
|
||||
EXPECT_EQ(res[3].str, u"2017년");
|
||||
EXPECT_EQ(res[13].str, u"2016년");
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<PretokenizedSpan> pretokenized = {
|
||||
{ 27, 29, { { u"페트", 0, 2, POSTag::nnb } } },
|
||||
{ 30, 32, {} },
|
||||
{ 21, 24, { { u"개봉하", 0, 3, POSTag::vv }, { u"었", 2, 3, POSTag::ep } }},
|
||||
};
|
||||
|
||||
res = kiwi.analyze(str, Match::allWithNormalizing, nullptr, &pretokenized).first;
|
||||
EXPECT_EQ(res[7].str, u"개봉하");
|
||||
EXPECT_EQ(res[7].tag, POSTag::vv);
|
||||
EXPECT_EQ(res[7].position, 21);
|
||||
EXPECT_EQ(res[7].length, 3);
|
||||
EXPECT_EQ(res[8].str, u"었");
|
||||
EXPECT_EQ(res[8].tag, POSTag::ep);
|
||||
EXPECT_EQ(res[8].position, 23);
|
||||
EXPECT_EQ(res[8].length, 1);
|
||||
EXPECT_EQ(res[11].str, u"페트");
|
||||
EXPECT_EQ(res[11].tag, POSTag::nnb);
|
||||
EXPECT_EQ(res[13].str, u"매트");
|
||||
EXPECT_EQ(res[13].tag, POSTag::nng);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(KiwiCpp, HSDataset)
|
||||
{
|
||||
KiwiBuilder kw{ MODEL_PATH, 0, BuildOption::default_, };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue