#define _JNI_INT64_TO_INT #include "JniUtils.hpp" #include #include struct Sentence { std::u16string text; uint32_t start, end; std::vector subSents; std::optional> tokens; }; struct JoinableToken { std::u16string form; kiwi::POSTag tag; bool inferRegularity; kiwi::cmb::Space space; }; static auto gClsTokenInfo = jni::DataClassDefinition() .template property<&kiwi::TokenInfo::str>("form") .template property<&kiwi::TokenInfo::position>("position") .template property<&kiwi::TokenInfo::wordPosition>("wordPosition") .template property<&kiwi::TokenInfo::sentPosition>("sentPosition") .template property<&kiwi::TokenInfo::lineNumber>("lineNumber") .template property<&kiwi::TokenInfo::length>("length") .template property<&kiwi::TokenInfo::tag>("tag") .template property<&kiwi::TokenInfo::senseId>("senseId") .template property<&kiwi::TokenInfo::score>("score") .template property<&kiwi::TokenInfo::typoCost>("typoCost") .template property<&kiwi::TokenInfo::typoFormId>("typoFormId") .template property<&kiwi::TokenInfo::pairedToken>("pairedToken") .template property<&kiwi::TokenInfo::subSentPosition>("subSentPosition"); static auto gClsTokenResult = jni::DataClassDefinition() .template property<&kiwi::TokenResult::first>("tokens") .template property<&kiwi::TokenResult::second>("score"); static auto gClsSentence = jni::DataClassDefinition() .template property<&Sentence::text>("text") .template property<&Sentence::start>("start") .template property<&Sentence::end>("end") .template property<&Sentence::subSents>("subSents") .template property<&Sentence::tokens>("tokens"); static auto gClsJoinableToken = jni::DataClassDefinition() .template property<&JoinableToken::form>("form") .template property<&JoinableToken::tag>("tag") .template property<&JoinableToken::inferRegularity>("inferRegularity") .template property<&JoinableToken::space>("space"); namespace jni { template<> struct ValueBuilder : public ValueBuilder { using CppType = kiwi::BuildOption; using JniType = jint; CppType fromJava(JNIEnv* env, JniType v) { return (CppType)v; } JniType toJava(JNIEnv* env, CppType v) { return (JniType)v; } }; template<> struct ValueBuilder : public ValueBuilder { using CppType = kiwi::Match; using JniType = jint; CppType fromJava(JNIEnv* env, JniType v) { return (CppType)v; } JniType toJava(JNIEnv* env, CppType v) { return (JniType)v; } }; template<> struct ValueBuilder : public ValueBuilder { using CppType = kiwi::POSTag; using JniType = jbyte; CppType fromJava(JNIEnv* env, JniType v) { return (CppType)v; } JniType toJava(JNIEnv* env, CppType v) { return (JniType)v; } }; template<> struct ValueBuilder : public ValueBuilder { using CppType = kiwi::cmb::Space; using JniType = jbyte; CppType fromJava(JNIEnv* env, JniType v) { return (CppType)v; } JniType toJava(JNIEnv* env, CppType v) { return (JniType)v; } }; template<> struct JClassName { static constexpr auto value = std::string_view{ "kr/pe/bab2min/Kiwi$TokenResult" }; }; template<> struct ValueBuilder : public ValueBuilder { }; template<> struct JClassName { static constexpr auto value = std::string_view{ "kr/pe/bab2min/Kiwi$Token" }; }; template<> struct ValueBuilder : public ValueBuilder { }; template<> struct JClassName { static constexpr auto value = std::string_view{ "kr/pe/bab2min/Kiwi$Sentence" }; }; template<> struct ValueBuilder : public ValueBuilder { }; template<> struct JClassName { static constexpr auto value = std::string_view{ "kr/pe/bab2min/Kiwi$JoinableToken" }; }; template<> struct ValueBuilder : public ValueBuilder { }; } class JKiwi : public kiwi::Kiwi, jni::JObject { public: static constexpr std::string_view className = "kr/pe/bab2min/Kiwi"; using kiwi::Kiwi::Kiwi; JKiwi(Kiwi&& inst) : Kiwi{ std::move(inst) } {} auto analyze(const std::u16string& text, uint64_t topN, kiwi::Match matchOption) const { return Kiwi::analyze(text, topN, matchOption); } std::vector splitIntoSents(const std::u16string& text, kiwi::Match matchOption, bool returnTokens) const { std::vector ret; auto tokens = Kiwi::analyze(text, matchOption).first; uint32_t sentPos = -1; size_t i = 0, t = 0; for (auto& token : tokens) { if (token.sentPosition != sentPos) { if (!ret.empty()) { ret.back().text = text.substr(ret.back().start, ret.back().end - ret.back().start); if (returnTokens) { ret.back().tokens.emplace(std::make_move_iterator(tokens.begin() + t), std::make_move_iterator(tokens.begin() + i)); } } ret.emplace_back(); ret.back().start = token.position; ret.back().end = token.position + token.length; sentPos = token.sentPosition; t = i; } else { ret.back().end = token.position + token.length; } ++i; } if (!ret.empty()) { ret.back().text = text.substr(ret.back().start, ret.back().end - ret.back().start); if (returnTokens) { ret.back().tokens.emplace(std::make_move_iterator(tokens.begin() + t), std::make_move_iterator(tokens.begin() + i)); } } // To Do: process for subSents return ret; } std::u16string join(std::vector&& tokens) const { auto joiner = Kiwi::newJoiner(); for (auto& token : tokens) { joiner.add(token.form, token.tag, token.inferRegularity, token.space); } return joiner.getU16(); } }; class JKiwiBuilder : public kiwi::KiwiBuilder, jni::JObject { public: static constexpr std::string_view className = "kr/pe/bab2min/KiwiBuilder"; using kiwi::KiwiBuilder::KiwiBuilder; JKiwi build() const { return KiwiBuilder::build(); } }; jni::Module gModule{ JNI_VERSION_1_8 }; JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) { return gModule.load(vm, jni::define() .template ctor() .template method(&JKiwiBuilder::addWord)>("addWord") .template method(&JKiwiBuilder::addWord)>("addWord") .template method<&JKiwiBuilder::build>("build") .template method<&JKiwiBuilder::loadDictionary>("loadDictionary"), jni::define() .template method<&JKiwi::analyze>("analyze") .template method<&JKiwi::splitIntoSents>("splitIntoSents") .template method<&JKiwi::join>("join"), gClsTokenInfo, gClsTokenResult, gClsSentence, gClsJoinableToken ); } JNIEXPORT void JNICALL JNI_OnUnload(JavaVM* vm, void* reserved) { gModule.unload(vm); }