Kiwi/test/test_combiner.cpp
2022-08-31 02:24:39 +09:00

211 lines
6.8 KiB
C++

#include "gtest/gtest.h"
#include "common.h"
#include "../src/Combiner.h"
using namespace kiwi;
cmb::CompiledRule& getCompiledRule()
{
static cmb::CompiledRule rule;
if (!rule.isReady())
{
cmb::RuleSet crs;
std::ifstream ifs{ MODEL_PATH "/combiningRule.txt" };
crs.loadRules(ifs);
rule = crs.compile();
}
return rule;
}
TEST(KiwiCppCombiner, Combine)
{
auto& rule = getCompiledRule();
EXPECT_EQ(rule.combine(u"", POSTag::vcp, u"", POSTag::ec, CondVowel::vowel)[0], u"");
EXPECT_EQ(rule.combine(u"", POSTag::vcp, u"", POSTag::ec, CondVowel::vowel)[0], u"");
EXPECT_EQ(rule.combine(u"", POSTag::vcp, u"ᆫ지도", POSTag::ec, CondVowel::vowel)[0], u"ᆫ지도");
EXPECT_EQ(rule.combine(u"", POSTag::vcp, u"ᆫ가", POSTag::ec, CondVowel::vowel)[0], u"ᆫ가");
EXPECT_EQ(rule.combine(u"", POSTag::p, u"", POSTag::etm, CondVowel::vowel)[0], u"");
EXPECT_EQ(rule.combine(u"", POSTag::vv, u"", POSTag::ep)[0], u"");
EXPECT_EQ(rule.combine(u"", POSTag::ep, u"", POSTag::ep)[0], u"");
EXPECT_EQ(rule.combine(u"이르", POSTag::vv, u"", POSTag::ec)[0], u"일러");
EXPECT_EQ(rule.combine(u"이르", POSTag::vvi, u"", POSTag::ec)[0], u"이르러");
EXPECT_EQ(rule.combine(u"", POSTag::vv, u"", POSTag::ec)[0], u"");
EXPECT_EQ(rule.combine(u"따르", POSTag::vv, u"", POSTag::ec)[0], u"따라");
EXPECT_EQ(rule.combine(u"", POSTag::vv, u"", POSTag::ec)[0], u"도와");
EXPECT_EQ(rule.combine(u"", POSTag::vv, u"도록", POSTag::ec)[0], u"토록");
EXPECT_EQ(rule.combine(u"", POSTag::vv, u"", POSTag::ec)[0], u"");
EXPECT_EQ(rule.combine(u"", POSTag::vvi, u"", POSTag::ec)[0], u"물어");
EXPECT_EQ(rule.combine(u"", POSTag::vv, u"", POSTag::ec)[0], u"묻어");
EXPECT_EQ(rule.combine(u"타이르", POSTag::p, u"", POSTag::ec)[0], u"타일러");
EXPECT_EQ(rule.combine(u"가르", POSTag::p, u"", POSTag::ec)[0], u"갈라");
EXPECT_EQ(rule.combine(u"", POSTag::np, u"", POSTag::jks)[0], u"내가");
EXPECT_EQ(rule.combine(u"", POSTag::ep, u"어용", POSTag::ef)[0], u"셔용");
}
TEST(KiwiCppCombiner, Joiner)
{
auto& rule = getCompiledRule();
auto joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"했다");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"하셨다");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"하다");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"어서", POSTag::ec);
EXPECT_EQ(joiner.getU16(), u"도와서");
joiner = rule.newJoiner();
joiner.add(u"아름답", POSTag::vai);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"아름다워");
joiner = rule.newJoiner();
joiner.add(u"다시", POSTag::mag);
joiner.add(u"시동", POSTag::nng);
joiner.add(u"", POSTag::jko);
joiner.add(u"잽싸", POSTag::va);
joiner.add(u"", POSTag::ec);
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ef);
joiner.add(u".", POSTag::sf);
EXPECT_EQ(joiner.getU16(), u"다시 시동을 잽싸게 걸었다.");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::va);
joiner.add(u"", POSTag::etm);
joiner.add(u"소리", POSTag::nng);
joiner.add(u"", POSTag::vcp);
joiner.add(u"라도", POSTag::ec);
joiner.add(u"", POSTag::vvi);
joiner.add(u"", POSTag::ef);
joiner.add(u"!", POSTag::sf);
EXPECT_EQ(joiner.getU16(), u"작은 소리라도 들어!");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::np);
joiner.add(u"", POSTag::jks);
joiner.add(u"", POSTag::vvi);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"내가 물었다");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ec);
joiner.add(u"", POSTag::vx);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"돼지다");
joiner = rule.newJoiner();
joiner.add(u"하얗", POSTag::vai);
joiner.add(u"으니", POSTag::ec);
EXPECT_EQ(joiner.getU16(), u"하야니");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::va);
joiner.add(u"으니", POSTag::ec);
EXPECT_EQ(joiner.getU16(), u"좋으니");
}
TEST(KiwiCppCombiner, Allomorph)
{
using Tuple = std::tuple<nonstd::u16string_view, CondVowel, uint8_t>;
auto& rule = getCompiledRule();
rule.addAllomorph({
Tuple{ nonstd::u16string_view{u""}, CondVowel::vowel, (uint8_t)0},
Tuple{ nonstd::u16string_view{u""}, CondVowel::non_vowel, (uint8_t)0}
}, POSTag::jko);
rule.addAllomorph({
Tuple{ nonstd::u16string_view{u""}, CondVowel::vowel, (uint8_t)0},
Tuple{ nonstd::u16string_view{u""}, CondVowel::non_vowel, (uint8_t)0}
}, POSTag::jks);
rule.addAllomorph({
Tuple{ nonstd::u16string_view{u""}, CondVowel::vocalic, (uint8_t)0},
Tuple{ nonstd::u16string_view{u"으로"}, CondVowel::non_vowel, (uint8_t)0}
}, POSTag::jkb);
auto joiner = rule.newJoiner();
joiner.add(u"시동", POSTag::nng);
joiner.add(u"", POSTag::jko);
EXPECT_EQ(joiner.getU16(), u"시동을");
joiner = rule.newJoiner();
joiner.add(u"시도", POSTag::nng);
joiner.add(u"", POSTag::jko);
EXPECT_EQ(joiner.getU16(), u"시도를");
joiner = rule.newJoiner();
joiner.add(u"바다", POSTag::nng);
joiner.add(u"", POSTag::jks);
EXPECT_EQ(joiner.getU16(), u"바다가");
joiner = rule.newJoiner();
joiner.add(u"바닥", POSTag::nng);
joiner.add(u"", POSTag::jks);
EXPECT_EQ(joiner.getU16(), u"바닥이");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::nng);
joiner.add(u"으로", POSTag::jkb);
EXPECT_EQ(joiner.getU16(), u"불로");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::nng);
joiner.add(u"으로", POSTag::jkb);
EXPECT_EQ(joiner.getU16(), u"북으로");
rule.addAllomorph({
Tuple{ nonstd::u16string_view{u""}, CondVowel::vocalic, (uint8_t)0},
Tuple{ nonstd::u16string_view{u"으면"}, CondVowel::non_vowel, (uint8_t)0}
}, POSTag::ec);
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ec);
EXPECT_EQ(joiner.getU16(), u"갈면");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ec);
EXPECT_EQ(joiner.getU16(), u"가셨으면");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ep);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"했다");
joiner = rule.newJoiner();
joiner.add(u"", POSTag::vv);
joiner.add(u"", POSTag::ef);
EXPECT_EQ(joiner.getU16(), u"날아");
}