Fix spacing/formatting issues in documentation comments

Co-authored-by: bab2min <19266222+bab2min@users.noreply.github.com>
Add Doxygen documentation to PatternMatcher, TagUtils, and Mmap
2026-06-17 01:54:27 +00:00 · 2026-01-15 06:14:29 +00:00 · 2026-01-15 06:10:54 +00:00 · 2026-01-15 06:08:58 +00:00 · 2026-01-15 06:05:47 +00:00 · 2026-01-15 05:57:54 +00:00
17 changed files with 857 additions and 51 deletions
--- a/include/kiwi/CoNgramModel.h
+++ b/include/kiwi/CoNgramModel.h
@ -1,3 +1,14 @@
+/**
+ * @file CoNgramModel.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 문맥 기반 N-gram 언어 모델 (Contextual N-gram Model) 구현
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 단어 임베딩과 문맥 정보를 활용한 신경망 기반 언어 모델입니다.
+ * 전통적인 N-gram 모델보다 더 풍부한 의미 정보를 포착할 수 있습니다.
+ */
+
 #pragma once

 #include <array>
@ -15,25 +26,49 @@ namespace kiwi
 {
 	namespace lm
 	{
+		/**
+		 * @brief 문맥 N-gram 모델의 헤더 정보
+		 */
 		struct CoNgramModelHeader
 		{
-			uint64_t vocabSize, contextSize;
-			uint16_t dim;
-			uint8_t contextType, outputType;
-			uint8_t keySize, windowSize, qbit, qgroup;
-			uint64_t numNodes;
-			uint64_t nodeOffset, keyOffset, valueOffset, embOffset;
+			uint64_t vocabSize;     /**< 어휘 크기 */
+			uint64_t contextSize;   /**< 문맥 크기 */
+			uint16_t dim;           /**< 임베딩 차원 */
+			uint8_t contextType;    /**< 문맥 타입 */
+			uint8_t outputType;     /**< 출력 타입 */
+			uint8_t keySize;        /**< 키 크기 */
+			uint8_t windowSize;     /**< 윈도우 크기 */
+			uint8_t qbit;           /**< 양자화 비트 수 */
+			uint8_t qgroup;         /**< 양자화 그룹 크기 */
+			uint64_t numNodes;      /**< 노드 개수 */
+			uint64_t nodeOffset;    /**< 노드 데이터 오프셋 */
+			uint64_t keyOffset;     /**< 키 데이터 오프셋 */
+			uint64_t valueOffset;   /**< 값 데이터 오프셋 */
+			uint64_t embOffset;     /**< 임베딩 데이터 오프셋 */
 		};

+		/**
+		 * @brief 문맥 N-gram 모델의 노드 구조
+		 * 
+		 * @tparam KeyType 키 타입
+		 * @tparam ValueType 값 타입
+		 * @tparam DiffType diff 타입
+		 */
 		template<class KeyType, class ValueType, class DiffType = int32_t>
 		struct Node
 		{
-			KeyType numNexts = 0;
-			ValueType value = 0;
-			DiffType lower = 0;
-			uint32_t nextOffset = 0;
+			KeyType numNexts = 0;     /**< 다음 노드의 개수 */
+			ValueType value = 0;      /**< 노드 값 */
+			DiffType lower = 0;       /**< 하위 노드로의 오프셋 */
+			uint32_t nextOffset = 0;  /**< 다음 노드들의 시작 오프셋 */
 		};

+		/**
+		 * @brief 문맥 기반 N-gram 언어 모델의 기본 클래스
+		 * 
+		 * 신경망 임베딩을 활용하여 문맥 정보를 효과적으로 활용하는 언어 모델입니다.
+		 * 단어의 의미적 유사도와 문맥 유사도를 계산할 수 있습니다.
+		 */
 		class CoNgramModelBase : public ILangModel
 		{
 		protected:
@ -49,20 +84,84 @@ namespace kiwi
 			size_t vocabSize() const override { return header.vocabSize; }
 			size_t getMemorySize() const override { return memorySize; }

+			/**
+			 * @brief 모델 헤더 정보를 반환합니다.
+			 * @return CoNgramModelHeader에 대한 const 참조
+			 */
 			const CoNgramModelHeader& getHeader() const { return header; }

+			/**
+			 * @brief 주어진 단어와 가장 유사한 단어들을 찾습니다.
+			 * @param vocabId 단어 ID
+			 * @param topN 상위 N개
+			 * @param output 결과를 저장할 배열 (단어 ID, 유사도)
+			 * @return 찾은 단어의 개수
+			 */
 			virtual size_t mostSimilarWords(uint32_t vocabId, size_t topN, std::pair<uint32_t, float>* output) const = 0;
+			
+			/**
+			 * @brief 두 단어 간의 유사도를 계산합니다.
+			 * @param vocabId1 첫 번째 단어 ID
+			 * @param vocabId2 두 번째 단어 ID
+			 * @return 유사도 점수
+			 */
 			virtual float wordSimilarity(uint32_t vocabId1, uint32_t vocabId2) const = 0;

+			/**
+			 * @brief 주어진 문맥과 가장 유사한 문맥들을 찾습니다.
+			 * @param contextId 문맥 ID
+			 * @param topN 상위 N개
+			 * @param output 결과를 저장할 배열
+			 * @return 찾은 문맥의 개수
+			 */
 			virtual size_t mostSimilarContexts(uint32_t contextId, size_t topN, std::pair<uint32_t, float>* output) const = 0;
+			
+			/**
+			 * @brief 두 문맥 간의 유사도를 계산합니다.
+			 * @param contextId1 첫 번째 문맥 ID
+			 * @param contextId2 두 번째 문맥 ID
+			 * @return 유사도 점수
+			 */
 			virtual float contextSimilarity(uint32_t contextId1, uint32_t contextId2) const = 0;

+			/**
+			 * @brief 주어진 문맥에서 예측되는 단어들을 반환합니다.
+			 * @param contextId 문맥 ID
+			 * @param topN 상위 N개
+			 * @param output 결과를 저장할 배열
+			 * @return 예측된 단어의 개수
+			 */
 			virtual size_t predictWordsFromContext(uint32_t contextId, size_t topN, std::pair<uint32_t, float>* output) const = 0;
+			
+			/**
+			 * @brief 문맥 차이를 고려하여 단어를 예측합니다.
+			 * @param contextId 문맥 ID
+			 * @param bgContextId 배경 문맥 ID
+			 * @param weight 가중치
+			 * @param topN 상위 N개
+			 * @param output 결과를 저장할 배열
+			 * @return 예측된 단어의 개수
+			 */
 			virtual size_t predictWordsFromContextDiff(uint32_t contextId, uint32_t bgContextId, float weight, size_t topN, std::pair<uint32_t, float>* output) const = 0;

+			/**
+			 * @brief 단어 ID 시퀀스를 문맥 ID로 변환합니다.
+			 * @param vocabIds 단어 ID 배열
+			 * @param size 배열 크기
+			 * @return 문맥 ID
+			 */
 			virtual uint32_t toContextId(const uint32_t* vocabIds, size_t size) const = 0;
+			
+			/**
+			 * @brief 문맥과 단어의 매핑을 반환합니다.
+			 * @return 문맥-단어 매핑 벡터
+			 */
 			virtual std::vector<std::vector<uint32_t>> getContextWordMap() const = 0;

+			/**
+			 * @brief 캐시된 문맥-단어 매핑을 반환합니다.
+			 * @return 캐시된 문맥-단어 매핑에 대한 const 참조
+			 */
 			const std::vector<std::vector<uint32_t>>& getContextWordMapCached() const
 			{
 				if (contextWordMapCache.empty())
@ -72,6 +171,16 @@ namespace kiwi
 				return contextWordMapCache;
 			}

+			/**
+			 * @brief 문맥 정의와 임베딩으로부터 모델을 빌드합니다.
+			 * @param contextDefinition 문맥 정의 파일 경로
+			 * @param embedding 임베딩 파일 경로
+			 * @param maxContextLength 최대 문맥 길이
+			 * @param useVLE VLE(Variable Length Encoding) 사용 여부
+			 * @param reorderContextIdx 문맥 인덱스 재정렬 여부
+			 * @param selectedEmbIdx 선택된 임베딩 인덱스
+			 * @return 빌드된 모델의 메모리 객체
+			 */
 			static utils::MemoryObject build(const std::string& contextDefinition, const std::string& embedding, 
 				size_t maxContextLength = -1, 
 				bool useVLE = true, 
--- a/include/kiwi/FrozenTrie.h
+++ b/include/kiwi/FrozenTrie.h
@ -1,4 +1,16 @@
-#pragma once
+/**
+ * @file FrozenTrie.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 메모리 효율적인 불변(immutable) Trie 자료구조 정의
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 읽기 전용 Trie 자료구조로, 빠른 문자열 검색과 패턴 매칭을 지원합니다.
+ * Aho-Corasick 알고리즘을 위한 실패 링크(fail link)를 포함합니다.
+ * 형태소 사전 검색 등에 사용됩니다.
+ */
+
+#pragma once

 #include <array>
 #include <vector>
@ -15,6 +27,10 @@ namespace kiwi
 	{
 		namespace detail
 		{
+			/**
+			 * @brief 값이 부분 매칭을 가지는지 확인하는 헬퍼 구조체
+			 * @tparam Value 값 타입
+			 */
 			template<class Value, class = void>
 			struct HasSubmatch {};

@ -66,6 +82,18 @@ namespace kiwi
 			};
 		}

+		/**
+		 * @brief 메모리 효율적인 불변(frozen) Trie 자료구조
+		 * 
+		 * 빌드 후 수정할 수 없는 Trie로, 메모리 사용량이 최적화되어 있습니다.
+		 * Aho-Corasick 알고리즘을 위한 실패 함수(fail function)를 포함하여
+		 * 다중 패턴 매칭을 효율적으로 수행할 수 있습니다.
+		 * 
+		 * @tparam _Key 키(문자) 타입
+		 * @tparam _Value 값 타입
+		 * @tparam _Diff diff 값의 타입
+		 * @tparam _HasSubmatch 부분 매칭 검사 헬퍼
+		 */
 		template<class _Key, class _Value, class _Diff = int32_t, class _HasSubmatch = detail::HasSubmatch<_Value>>
 		class FrozenTrie : public _HasSubmatch
 		{
@ -74,19 +102,46 @@ namespace kiwi
 			using Value = _Value;
 			using Diff = _Diff;

+			/**
+			 * @brief Trie의 노드 구조체
+			 */
 			struct Node
 			{
-				Key numNexts = 0;
-				Diff lower = 0;
-				uint32_t nextOffset = 0;
+				Key numNexts = 0;        /**< 자식 노드의 개수 */
+				Diff lower = 0;          /**< 하위 노드로의 오프셋 */
+				uint32_t nextOffset = 0; /**< 다음 노드들의 시작 오프셋 */

+				/**
+				 * @brief 다음 문자에 해당하는 노드를 찾습니다.
+				 * @tparam arch 아키텍처 타입 (최적화를 위한)
+				 * @param ft FrozenTrie 참조
+				 * @param c 다음 문자
+				 * @return 찾은 노드 포인터, 없으면 nullptr
+				 */
 				template<ArchType arch>
 				const Node* nextOpt(const FrozenTrie& ft, Key c) const;

+				/**
+				 * @brief 실패 링크를 따라 다음 노드를 찾습니다.
+				 * @tparam arch 아키텍처 타입
+				 * @param ft FrozenTrie 참조
+				 * @param c 다음 문자
+				 * @return 찾은 노드 포인터
+				 */
 				template<ArchType arch>
 				const Node* findFail(const FrozenTrie& ft, Key c) const;

+				/**
+				 * @brief 실패 링크를 반환합니다.
+				 * @return 실패 노드 포인터
+				 */
 				const Node* fail() const;
+				
+				/**
+				 * @brief 노드의 값을 반환합니다.
+				 * @param ft FrozenTrie 참조
+				 * @return 노드의 값에 대한 const 참조
+				 */
 				const Value& val(const FrozenTrie& ft) const;
 			};
 		private:
--- a/include/kiwi/Joiner.h
+++ b/include/kiwi/Joiner.h
@ -1,3 +1,14 @@
+/**
+ * @file Joiner.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 형태소를 결합하여 문장을 재구성하는 Joiner 클래스 정의
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 형태소 분석의 역과정으로, 분석된 형태소들을 다시 원래의 문장 형태로 결합합니다.
+ * 한국어의 복잡한 음운 규칙과 철자 규칙을 고려하여 자연스러운 문장을 생성합니다.
+ */
+
 #pragma once
 #include "Types.h"
 #include "ArchUtils.h"
@ -13,13 +24,23 @@ namespace kiwi
 		class CompiledRule;
 		class AutoJoiner;

+		/**
+		 * @brief 형태소 결합 시 공백 처리 방식을 나타내는 열거형
+		 */
 		enum class Space
 		{
-			none = 0,
-			no_space = 1,
-			insert_space = 2,
+			none = 0,         /**< 공백 처리 없음 */
+			no_space = 1,     /**< 공백을 삽입하지 않음 */
+			insert_space = 2, /**< 공백을 삽입함 */
 		};

+		/**
+		 * @brief 형태소를 결합하여 문장을 재구성하는 클래스
+		 * 
+		 * 분석된 형태소들을 한국어의 음운 규칙에 따라 결합하여 
+		 * 자연스러운 문장 형태로 복원합니다.
+		 * CompiledRule을 사용하여 형태소 결합 규칙을 적용합니다.
+		 */
 		class Joiner
 		{
 			friend class CompiledRule;
@ -42,19 +63,51 @@ namespace kiwi
 			Joiner& operator=(const Joiner&);
 			Joiner& operator=(Joiner&&);

+			/**
+			 * @brief 형태소를 결합 스택에 추가합니다.
+			 * @param form 형태소의 표면형
+			 * @param tag 품사 태그
+			 * @param space 공백 처리 방식
+			 */
 			void add(const std::u16string& form, POSTag tag, Space space = Space::none);
+			
+			/**
+			 * @brief 형태소를 결합 스택에 추가합니다.
+			 * @param form 형태소의 표면형 (C 문자열)
+			 * @param tag 품사 태그
+			 * @param space 공백 처리 방식
+			 */
 			void add(const char16_t* form, POSTag tag, Space space = Space::none);

+			/**
+			 * @brief 결합된 결과를 UTF-16 문자열로 반환합니다.
+			 * @param rangesOut 각 형태소의 문자 위치 범위를 저장할 벡터 (선택 사항)
+			 * @return 결합된 UTF-16 문자열
+			 */
 			std::u16string getU16(std::vector<std::pair<uint32_t, uint32_t>>* rangesOut = nullptr) const;
+			
+			/**
+			 * @brief 결합된 결과를 UTF-8 문자열로 반환합니다.
+			 * @param rangesOut 각 형태소의 바이트 위치 범위를 저장할 벡터 (선택 사항)
+			 * @return 결합된 UTF-8 문자열
+			 */
 			std::string getU8(std::vector<std::pair<uint32_t, uint32_t>>* rangesOut = nullptr) const;
 		};

+		/**
+		 * @brief 언어 모델을 사용한 형태소 결합 후보
+		 * 
+		 * 여러 가능한 결합 방식 중 가장 확률이 높은 것을 선택하기 위해
+		 * 언어 모델 상태와 점수를 함께 관리합니다.
+		 * 
+		 * @tparam LmState 언어 모델 상태 타입
+		 */
 		template<class LmState>
 		struct Candidate
 		{
-			Joiner joiner;
-			LmState lmState;
-			float score = 0;
+			Joiner joiner;      /**< 형태소 결합기 */
+			LmState lmState;    /**< 언어 모델 상태 */
+			float score = 0;    /**< 현재까지의 누적 점수 */

 			Candidate(const CompiledRule& _cr, const lm::ILangModel* lm)
 				: joiner{ _cr }, lmState{ lm }
@ -62,10 +115,17 @@ namespace kiwi
 			}
 		};

+		/**
+		 * @brief VoidLangModel을 위한 Candidate 특수화
+		 * 
+		 * 언어 모델을 사용하지 않는 경우의 후보입니다.
+		 * 
+		 * @tparam arch 아키텍처 타입
+		 */
 		template<ArchType arch>
 		struct Candidate<lm::VoidState<arch>>
 		{
-			Joiner joiner;
+			Joiner joiner;  /**< 형태소 결합기 */

 			Candidate(const CompiledRule& _cr, const lm::ILangModel* lm)
 				: joiner{ _cr }
@ -73,6 +133,12 @@ namespace kiwi
 			}
 		};

+		/**
+		 * @brief 타입이 지워진 벡터 컨테이너
+		 * 
+		 * 템플릿 타입 정보를 런타임에 관리하기 위한 타입 소거(type erasure) 벡터입니다.
+		 * 다양한 타입의 Candidate를 동일한 방식으로 저장하고 관리할 수 있게 합니다.
+		 */
 		class ErasedVector
 		{
 			using FnDestruct = void(*)(ErasedVector*);
@ -161,6 +227,13 @@ namespace kiwi
 			}
 		};

+		/**
+		 * @brief 자동으로 형태소를 결합하는 클래스
+		 * 
+		 * 언어 모델을 활용하여 여러 가능한 결합 방식 중 
+		 * 가장 확률이 높은 결합을 자동으로 선택합니다.
+		 * 형태소 추가 시 언어 모델 점수를 고려하여 최적의 후보를 유지합니다.
+		 */
 		class AutoJoiner
 		{
 			friend class kiwi::Kiwi;
@ -201,12 +274,51 @@ namespace kiwi
 			AutoJoiner& operator=(const AutoJoiner&);
 			AutoJoiner& operator=(AutoJoiner&&);

+			/**
+			 * @brief 형태소 ID로 형태소를 추가합니다.
+			 * @param morphemeId 형태소 인덱스
+			 * @param space 공백 처리 방식
+			 */
 			void add(size_t morphemeId, Space space = Space::none);
+			
+			/**
+			 * @brief 형태소를 추가합니다 (StringView).
+			 * @param form 형태소의 표면형
+			 * @param tag 품사 태그
+			 * @param space 공백 처리 방식
+			 */
 			void add(U16StringView form, POSTag tag, Space space = Space::none);
+			
+			/**
+			 * @brief 형태소를 추가합니다 (u16string).
+			 * @param form 형태소의 표면형
+			 * @param tag 품사 태그
+			 * @param inferRegularity 규칙 활용 자동 추론 여부
+			 * @param space 공백 처리 방식
+			 */
 			void add(const std::u16string& form, POSTag tag, bool inferRegularity = true, Space space = Space::none);
+			
+			/**
+			 * @brief 형태소를 추가합니다 (C 문자열).
+			 * @param form 형태소의 표면형
+			 * @param tag 품사 태그
+			 * @param inferRegularity 규칙 활용 자동 추론 여부
+			 * @param space 공백 처리 방식
+			 */
 			void add(const char16_t* form, POSTag tag, bool inferRegularity = true, Space space = Space::none);

+			/**
+			 * @brief 결합된 결과를 UTF-16 문자열로 반환합니다.
+			 * @param rangesOut 각 형태소의 문자 위치 범위를 저장할 벡터 (선택 사항)
+			 * @return 결합된 UTF-16 문자열
+			 */
 			std::u16string getU16(std::vector<std::pair<uint32_t, uint32_t>>* rangesOut = nullptr) const;
+			
+			/**
+			 * @brief 결합된 결과를 UTF-8 문자열로 반환합니다.
+			 * @param rangesOut 각 형태소의 바이트 위치 범위를 저장할 벡터 (선택 사항)
+			 * @return 결합된 UTF-8 문자열
+			 */
 			std::string getU8(std::vector<std::pair<uint32_t, uint32_t>>* rangesOut = nullptr) const;
 		};
 	}
--- a/include/kiwi/Knlm.h
+++ b/include/kiwi/Knlm.h
@ -1,4 +1,16 @@
-#pragma once
+/**
+ * @file Knlm.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief Kneser-Ney 언어 모델 구현
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * Kneser-Ney 스무딩을 사용한 N-gram 언어 모델을 구현합니다.
+ * 형태소 분석 시 가장 가능성 높은 형태소 시퀀스를 선택하는 데 사용됩니다.
+ * 압축과 양자화를 지원하여 메모리 효율적인 모델을 제공합니다.
+ */
+
+#pragma once

 #include "LangModel.h"

@ -6,23 +18,55 @@ namespace kiwi
 {
 	namespace lm
 	{
+		/**
+		 * @brief Kneser-Ney 언어 모델의 헤더 정보
+		 * 
+		 * 모델의 메타데이터와 각 데이터 섹션의 오프셋을 저장합니다.
+		 */
 		struct KnLangModelHeader
 		{
-			uint64_t num_nodes, node_offset, key_offset, ll_offset, gamma_offset, qtable_offset, htx_offset;
-			uint64_t unk_id, bos_id, eos_id, vocab_size;
-			uint8_t order, key_size, diff_size, quantized;
-			uint32_t extra_buf_size;
+			uint64_t num_nodes;       /**< 노드의 총 개수 */
+			uint64_t node_offset;     /**< 노드 데이터의 시작 오프셋 */
+			uint64_t key_offset;      /**< 키 데이터의 시작 오프셋 */
+			uint64_t ll_offset;       /**< 로그 우도(log-likelihood) 데이터의 시작 오프셋 */
+			uint64_t gamma_offset;    /**< 감마(백오프 가중치) 데이터의 시작 오프셋 */
+			uint64_t qtable_offset;   /**< 양자화 테이블의 시작 오프셋 */
+			uint64_t htx_offset;      /**< 히스토리 변환 데이터의 시작 오프셋 */
+			uint64_t unk_id;          /**< 미등록어(unknown) ID */
+			uint64_t bos_id;          /**< 문장 시작(beginning of sentence) ID */
+			uint64_t eos_id;          /**< 문장 종료(end of sentence) ID */
+			uint64_t vocab_size;      /**< 어휘 크기 */
+			uint8_t order;            /**< N-gram 차수 */
+			uint8_t key_size;         /**< 키의 크기 (바이트) */
+			uint8_t diff_size;        /**< diff 값의 크기 (바이트) */
+			uint8_t quantized;        /**< 양자화 여부 */
+			uint32_t extra_buf_size;  /**< 추가 버퍼 크기 */
 		};

+		/**
+		 * @brief Kneser-Ney 언어 모델의 노드 구조
+		 * 
+		 * 각 N-gram을 표현하는 트리 노드입니다.
+		 * 
+		 * @tparam KeyType 키의 타입 (어휘 인덱스)
+		 * @tparam DiffType diff 값의 타입
+		 */
 		template<class KeyType, class DiffType = int32_t>
 		struct KnLangModelNode
 		{
-			KeyType num_nexts = 0;
-			DiffType lower = 0;
-			uint32_t next_offset = 0;
-			float ll = 0, gamma = 0;
+			KeyType num_nexts = 0;     /**< 다음 노드의 개수 */
+			DiffType lower = 0;        /**< 하위 노드로의 오프셋 */
+			uint32_t next_offset = 0;  /**< 다음 노드들의 시작 오프셋 */
+			float ll = 0;              /**< 로그 우도 */
+			float gamma = 0;           /**< 백오프 가중치 */
 		};

+		/**
+		 * @brief Kneser-Ney 언어 모델의 기본 클래스
+		 * 
+		 * 모든 Kneser-Ney 언어 모델 구현의 베이스 클래스입니다.
+		 * 메모리 매핑된 모델 데이터를 관리하고 N-gram 확률 계산을 제공합니다.
+		 */
 		class KnLangModelBase : public ILangModel
 		{
 		protected:
@ -44,13 +88,34 @@ namespace kiwi
 			size_t vocabSize() const override { return getHeader().vocab_size; }
 			size_t getMemorySize() const override { return base.size(); }

+			/**
+			 * @brief 모델 헤더 정보를 반환합니다.
+			 * @return KnLangModelHeader에 대한 const 참조
+			 */
 			const KnLangModelHeader& getHeader() const { return *reinterpret_cast<const KnLangModelHeader*>(base.get()); }

+			/**
+			 * @brief 하위 노드의 인덱스를 반환합니다.
+			 * @param node_idx 현재 노드 인덱스
+			 * @return 하위 노드 인덱스
+			 */
 			virtual ptrdiff_t getLowerNode(ptrdiff_t node_idx) const = 0;

 			virtual size_t nonLeafNodeSize() const = 0;
+			
+			/**
+			 * @brief 추가 버퍼를 반환합니다.
+			 * @return 추가 버퍼 포인터
+			 */
 			virtual const void* getExtraBuf() const = 0;

+			/**
+			 * @brief 메모리로부터 Kneser-Ney 언어 모델을 생성합니다.
+			 * @param mem 모델 데이터가 담긴 메모리 객체
+			 * @param archType 아키텍처 타입 (최적화를 위한)
+			 * @param transposed 전치 여부
+			 * @return 생성된 언어 모델의 unique_ptr
+			 */
 			static std::unique_ptr<KnLangModelBase> create(utils::MemoryObject&& mem, ArchType archType = ArchType::none, bool transposed = false);

 			template<class VocabTy, class Trie, class HistoryTx = std::vector<VocabTy>>
@ -64,14 +129,30 @@ namespace kiwi
 				size_t extra_buf_size = 0
 			);

+			/**
+			 * @brief 메모리 객체를 반환합니다.
+			 * @return 모델 데이터가 담긴 메모리 객체에 대한 const 참조
+			 */
 			const utils::MemoryObject& getMemory() const { return base; }

+			/**
+			 * @brief 다음 토큰으로 상태를 진행하고 로그 확률을 반환합니다.
+			 * @param node_idx 현재 노드 인덱스 (참조로 업데이트됨)
+			 * @param next 다음 토큰
+			 * @return 로그 확률
+			 */
 			template<class Ty>
 			float progress(ptrdiff_t& node_idx, Ty next) const
 			{
 				return _progress(node_idx, next);
 			}

+			/**
+			 * @brief 토큰 시퀀스를 평가하여 로그 확률을 계산합니다.
+			 * @param in_first 입력 시퀀스의 시작 반복자
+			 * @param in_last 입력 시퀀스의 끝 반복자
+			 * @param out_first 출력 확률을 저장할 시작 반복자
+			 */
 			template<class InTy, class OutTy>
 			void evaluate(InTy in_first, InTy in_last, OutTy out_first) const
 			{
@ -96,6 +177,13 @@ namespace kiwi
 				}
 			}

+			/**
+			 * @brief 토큰 시퀀스의 총 로그 확률을 계산합니다.
+			 * @param in_first 입력 시퀀스의 시작 반복자
+			 * @param in_last 입력 시퀀스의 끝 반복자
+			 * @param min_score 최소 점수 임계값
+			 * @return 총 로그 확률
+			 */
 			template<class InTy>
 			float sum(InTy in_first, InTy in_last, float min_score = -100) const
 			{
@ -108,6 +196,12 @@ namespace kiwi
 				return ret;
 			}

+			/**
+			 * @brief 주어진 히스토리에 대한 다음 토큰들의 로그 확률을 반환합니다.
+			 * @param in_first 히스토리 시퀀스의 시작 반복자
+			 * @param in_last 히스토리 시퀀스의 끝 반복자
+			 * @return 모든 다음 토큰의 로그 확률 벡터
+			 */
 			template<class InTy>
 			std::vector<float> getNextLL(InTy in_first, InTy in_last) const
 			{
--- a/include/kiwi/LangModel.h
+++ b/include/kiwi/LangModel.h
@ -1,3 +1,14 @@
+/**
+ * @file LangModel.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 언어 모델 인터페이스 및 기본 구현을 정의하는 헤더 파일
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 이 파일은 형태소 분석에서 사용되는 언어 모델의 인터페이스를 정의합니다.
+ * 언어 모델은 형태소 시퀀스의 확률을 계산하여 가장 가능성 높은 분석 결과를 선택하는 데 사용됩니다.
+ */
+
 #pragma once

 #include <array>
@ -16,21 +27,61 @@ namespace kiwi
 {
 	namespace lm
 	{
+		/**
+		 * @brief 언어 모델의 기본 인터페이스
+		 * 
+		 * 모든 언어 모델 구현체가 상속해야 하는 추상 인터페이스입니다.
+		 * 형태소 분석 과정에서 각 형태소 시퀀스의 확률을 계산하는데 사용됩니다.
+		 */
 		class ILangModel
 		{
 		public:
 			virtual ~ILangModel() = default;
+			/**
+			 * @brief 언어 모델의 타입을 반환합니다.
+			 * @return 언어 모델 타입 (none, knlm, skipbigram 등)
+			 */
 			virtual ModelType getType() const = 0;
+			/**
+			 * @brief 언어 모델의 어휘 크기를 반환합니다.
+			 * @return 어휘(vocabulary)에 포함된 형태소의 개수
+			 */
 			virtual size_t vocabSize() const = 0;
+			/**
+			 * @brief 언어 모델이 사용하는 메모리 크기를 반환합니다.
+			 * @return 메모리 사용량 (바이트 단위)
+			 */
 			virtual size_t getMemorySize() const = 0;

+			/**
+			 * @brief 최적 경로 탐색 함수 포인터를 반환합니다.
+			 * @return 최적 경로 탐색에 사용되는 함수 포인터
+			 */
 			virtual void* getFindBestPathFn() const = 0;
+			/**
+			 * @brief 새로운 Joiner 생성 함수 포인터를 반환합니다.
+			 * @return Joiner 생성에 사용되는 함수 포인터
+			 */
 			virtual void* getNewJoinerFn() const = 0;
 		};

+		/**
+		 * @brief 언어 모델 상태의 베이스 템플릿
+		 * 
+		 * CRTP(Curiously Recurring Template Pattern)를 사용하여
+		 * 파생 클래스의 구현을 정적으로 디스패치합니다.
+		 * 
+		 * @tparam DerivedLM 파생된 언어 모델 클래스
+		 */
 		template<class DerivedLM>
 		struct LmStateBase
 		{
+			/**
+			 * @brief 다음 토큰에 대한 확률을 계산하고 상태를 업데이트합니다.
+			 * @param langMdl 언어 모델 포인터
+			 * @param nextToken 다음 토큰
+			 * @return 다음 토큰의 로그 확률
+			 */
 			float next(const ILangModel* langMdl, typename DerivedLM::VocabType nextToken)
 			{
 				using LmStateType = typename DerivedLM::LmStateType;
@ -41,6 +92,14 @@ namespace kiwi
 		template<ArchType arch>
 		class VoidLangModel;

+		/**
+		 * @brief VoidLangModel의 상태 클래스
+		 * 
+		 * 언어 모델을 사용하지 않을 때 사용되는 더미 상태입니다.
+		 * 항상 0의 확률을 반환합니다.
+		 * 
+		 * @tparam arch 아키텍처 타입
+		 */
 		template<ArchType arch>
 		struct VoidState : public LmStateBase<VoidLangModel<arch>>
 		{
@ -55,6 +114,14 @@ namespace kiwi
 			}
 		};

+		/**
+		 * @brief 언어 모델을 사용하지 않는 더미 언어 모델
+		 * 
+		 * 언어 모델 없이 형태소 분석을 수행할 때 사용됩니다.
+		 * 모든 확률 계산에서 0을 반환하여 언어 모델의 영향을 받지 않습니다.
+		 * 
+		 * @tparam arch 아키텍처 타입
+		 */
 		template<ArchType arch>
 		class VoidLangModel : public ILangModel
 		{
--- a/include/kiwi/Mmap.h
+++ b/include/kiwi/Mmap.h
@ -1,4 +1,15 @@
-#pragma once
+/**
+ * @file Mmap.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 메모리 맵 파일 입출력 클래스
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 대용량 파일을 메모리에 매핑하여 효율적으로 읽고 쓰는 기능을 제공합니다.
+ * 언어 모델, 사전 데이터 등을 빠르게 로드하는 데 사용됩니다.
+ */
+
+#pragma once
 #include <string>
 #include <iostream>
 #include <fstream>
@ -12,6 +23,9 @@ namespace kiwi
 	{
 		namespace detail
 		{
+			/**
+			 * @brief Windows 핸들을 자동으로 관리하는 RAII 가드
+			 */
 			class HandleGuard
 			{
 				HANDLE handle = nullptr;
--- a/include/kiwi/PatternMatcher.h
+++ b/include/kiwi/PatternMatcher.h
@ -1,3 +1,14 @@
+/**
+ * @file PatternMatcher.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 텍스트 패턴 매칭 및 매칭 옵션 정의
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * URL, 이메일, 해시태그, 멘션 등의 특수 패턴을 인식하고
+ * 형태소 분석 시 적용할 옵션들을 정의합니다.
+ */
+
 #pragma once

 #include <vector>
@ -7,6 +18,11 @@

 namespace kiwi
 {
+	/**
+	 * @brief 형태소 분석 시 패턴 매칭 및 처리 옵션
+	 * 
+	 * 비트 플래그로 사용되어 여러 옵션을 조합할 수 있습니다.
+	 */
 	enum class Match : size_t
 	{
 		none = 0,
@ -27,12 +43,20 @@ namespace kiwi
 		compatibleJamo = 1 << 24, /**< 출력시 한글 첫가끝 자모를 호환가능한 자모로 변환한다. */
 		splitSaisiot = 1 << 25, /**< 사이시옷이 포함된 합성명사를 분리하여 매칭한다. */
 		mergeSaisiot = 1 << 26, /**< 사이시옷이 포함된 것으로 추정되는 명사를 결합하여 매칭한다. */
-		joinVSuffix = joinVerbSuffix | joinAdjSuffix,
-		joinAffix = joinNounPrefix | joinNounSuffix | joinVerbSuffix | joinAdjSuffix | joinAdvSuffix,
-		all = url | email | hashtag | mention | serial | emoji | zCoda,
-		allWithNormalizing = all | normalizeCoda,
+		joinVSuffix = joinVerbSuffix | joinAdjSuffix, /**< 용언 파생접미사 결합 */
+		joinAffix = joinNounPrefix | joinNounSuffix | joinVerbSuffix | joinAdjSuffix | joinAdvSuffix, /**< 모든 접사 결합 */
+		all = url | email | hashtag | mention | serial | emoji | zCoda, /**< 모든 웹 패턴 매칭 */
+		allWithNormalizing = all | normalizeCoda, /**< 모든 패턴과 정규화 */
 	};

+	/**
+	 * @brief 텍스트 패턴을 매칭합니다.
+	 * @param left 왼쪽 문자 (문맥)
+	 * @param first 텍스트 시작 포인터
+	 * @param last 텍스트 끝 포인터
+	 * @param matchOptions 매칭 옵션
+	 * @return (매칭된 길이, 품사 태그) 쌍
+	 */
 	std::pair<size_t, kiwi::POSTag> matchPattern(char16_t left, const char16_t* first, const char16_t* last, Match matchOptions);
 }

--- a/include/kiwi/SkipBigramModel.h
+++ b/include/kiwi/SkipBigramModel.h
@ -1,3 +1,15 @@
+/**
+ * @file SkipBigramModel.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief Skip-bigram 언어 모델 구현
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * Skip-bigram은 인접하지 않은 두 단어 사이의 관계를 학습하는 언어 모델입니다.
+ * 일반적인 bigram이 연속된 두 단어만을 고려하는 것과 달리,
+ * 중간에 다른 단어가 있어도 두 단어 사이의 관계를 포착할 수 있습니다.
+ */
+
 #pragma once

 #include "Knlm.h"
@ -6,12 +18,25 @@ namespace kiwi
 {
 	namespace lm
 	{
+		/**
+		 * @brief Skip-bigram 모델의 헤더 정보
+		 */
 		struct SkipBigramModelHeader
 		{
-			uint64_t vocabSize;
-			uint8_t keySize, windowSize, compressed, quantize, _rsv[4];
+			uint64_t vocabSize;    /**< 어휘 크기 */
+			uint8_t keySize;       /**< 키의 크기 */
+			uint8_t windowSize;    /**< 윈도우 크기 (skip 거리) */
+			uint8_t compressed;    /**< 압축 여부 */
+			uint8_t quantize;      /**< 양자화 비트 수 */
+			uint8_t _rsv[4];       /**< 예약 필드 */
 		};

+		/**
+		 * @brief Skip-bigram 언어 모델의 기본 클래스
+		 * 
+		 * 중간 단어를 건너뛰며 단어 간 관계를 학습하는 언어 모델입니다.
+		 * 긴 거리 의존성을 포착하여 더 정확한 형태소 분석을 가능하게 합니다.
+		 */
 		class SkipBigramModelBase : public ILangModel
 		{
 		protected:
@ -25,8 +50,19 @@ namespace kiwi
 			size_t vocabSize() const override { return getHeader().vocabSize; }
 			ModelType getType() const override { return ModelType::sbg; }

+			/**
+			 * @brief 모델 헤더 정보를 반환합니다.
+			 * @return SkipBigramModelHeader에 대한 const 참조
+			 */
 			const SkipBigramModelHeader& getHeader() const { return *reinterpret_cast<const SkipBigramModelHeader*>(base.get()); }

+			/**
+			 * @brief 메모리로부터 Skip-bigram 모델을 생성합니다.
+			 * @param knlmMem Kneser-Ney 언어 모델 메모리
+			 * @param sbgMem Skip-bigram 모델 메모리
+			 * @param archType 아키텍처 타입 (최적화를 위한)
+			 * @return 생성된 Skip-bigram 모델의 unique_ptr
+			 */
 			static std::unique_ptr<SkipBigramModelBase> create(utils::MemoryObject&& knlmMem, utils::MemoryObject&& sbgMem, ArchType archType = ArchType::none);
 		};
 	}
--- a/include/kiwi/TagUtils.h
+++ b/include/kiwi/TagUtils.h
@ -1,3 +1,13 @@
+/**
+ * @file TagUtils.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 품사 태그 관련 유틸리티 함수 및 클래스
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 품사 태그의 분류, 검사, 점수 계산 등을 위한 유틸리티를 제공합니다.
+ */
+
 #pragma once

 #include <algorithm>
@ -5,49 +15,107 @@

 namespace kiwi
 {
+	/**
+	 * @brief 품사 태그 시퀀스의 점수를 계산하는 클래스
+	 * 
+	 * 형태소 분석 결과의 자연스러움을 평가하기 위해
+	 * 품사 태그 시퀀스에 점수를 부여합니다.
+	 * 특히 어절 경계에서의 품사 조합을 평가합니다.
+	 */
 	class TagSequenceScorer
 	{
 		float leftBoundaryScores[2][(size_t)POSTag::max] = { { 0, }, };
 	public:
-		float weight;
+		float weight; /**< 점수 가중치 */

+		/**
+		 * @brief TagSequenceScorer 생성자
+		 * @param _weight 점수 가중치 (기본값: 5)
+		 */
 		TagSequenceScorer(float _weight = 5);

+		/**
+		 * @brief 왼쪽 경계에서의 품사 점수를 계산합니다.
+		 * @param hasLeftBoundary 왼쪽에 어절 경계가 있는지 여부
+		 * @param right 오른쪽 품사 태그
+		 * @return 계산된 점수
+		 */
 		float evalLeftBoundary(bool hasLeftBoundary, POSTag right) const
 		{
 			return leftBoundaryScores[hasLeftBoundary ? 1 : 0][(size_t)clearIrregular(right)] * weight;
 		}
 	};

+	/**
+	 * @brief 품사가 체언류인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 체언류(명사, 대명사, 수사)이면 true
+	 */
 	bool isNounClass(POSTag tag);
+	
+	/**
+	 * @brief 품사가 용언류인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 용언류(동사, 형용사)이면 true
+	 */
 	bool isVerbClass(POSTag tag);
 	
+	/**
+	 * @brief 품사가 어미류인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 어미류이면 true
+	 */
 	inline bool isEClass(POSTag tag)
 	{
 		return POSTag::ep <= tag && tag <= POSTag::etm;
 	}
 	
+	/**
+	 * @brief 품사가 조사류인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 조사류이면 true
+	 */
 	inline bool isJClass(POSTag tag)
 	{
 		return POSTag::jks <= tag && tag <= POSTag::jc;
 	}

+	/**
+	 * @brief 품사가 일반명사류인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 일반명사류이면 true
+	 */
 	inline bool isNNClass(POSTag tag)
 	{
 		return POSTag::nng <= tag && tag <= POSTag::nnb;
 	}

+	/**
+	 * @brief 품사가 파생접미사인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 파생접미사이면 true
+	 */
 	inline bool isSuffix(POSTag tag)
 	{
 		tag = clearIrregular(tag);
 		return POSTag::xsn <= tag && tag <= POSTag::xsm;
 	}
 	
+	/**
+	 * @brief 품사가 특수문자류인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 특수문자류이면 true
+	 */
 	inline bool isSpecialClass(POSTag tag)
 	{
 		return POSTag::sf <= tag && tag <= POSTag::sn;
 	}

+	/**
+	 * @brief 품사가 사용자 정의 품사인지 확인합니다.
+	 * @param tag 품사 태그
+	 * @return 사용자 정의 품사이면 true
+	 */
 	inline bool isUserClass(POSTag tag)
 	{
 		return POSTag::user0 <= tag && tag <= POSTag::user4;
--- a/include/kiwi/ThreadPool.h
+++ b/include/kiwi/ThreadPool.h
@ -1,4 +1,18 @@
-#pragma once
+/**
+ * @file ThreadPool.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 간단한 C++11 Thread Pool 구현
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * A simple C++11 Thread Pool implementation(https://github.com/progschj/ThreadPool)
+ * modified by bab2min to have additional parameter threadId
+ * 
+ * 멀티스레딩을 위한 작업 큐와 워커 스레드 풀을 제공합니다.
+ * 형태소 분석, 단어 추출 등의 병렬 처리에 사용됩니다.
+ */
+
+#pragma once

 /*
 A simple C++11 Thread Pool implementation(https://github.com/progschj/ThreadPool)
@ -19,18 +33,53 @@ namespace kiwi
 {
 	namespace utils
 	{
+		/**
+		 * @brief 작업을 병렬로 처리하는 스레드 풀
+		 * 
+		 * 고정된 수의 워커 스레드를 생성하고,
+		 * 작업을 큐에 넣어 스레드들이 병렬로 처리하도록 합니다.
+		 */
 		class ThreadPool
 		{
 		public:
+			/**
+			 * @brief ThreadPool 생성자
+			 * @param threads 워커 스레드의 개수 (0이면 스레드 없이 직렬 처리)
+			 * @param maxQueued 최대 큐 크기 (0이면 무제한)
+			 */
 			ThreadPool(size_t threads = 0, size_t maxQueued = 0);
 			~ThreadPool();

+			/**
+			 * @brief 작업을 큐에 추가합니다.
+			 * 
+			 * 작업 함수는 첫 번째 인자로 스레드 ID를 받습니다.
+			 * 
+			 * @tparam F 함수 타입
+			 * @tparam Args 인자 타입들
+			 * @param f 실행할 함수
+			 * @param args 함수에 전달할 인자들
+			 * @return 작업 결과를 받을 수 있는 future
+			 */
 			template<class F, class... Args>
 			auto enqueue(F&& f, Args&&... args)
 				->std::future<typename std::invoke_result<F, size_t, Args...>::type>;

+			/**
+			 * @brief 스레드 풀의 크기를 반환합니다.
+			 * @return 워커 스레드의 개수
+			 */
 			size_t size() const { return workers.size(); }
+			
+			/**
+			 * @brief 큐에 있는 작업의 개수를 반환합니다.
+			 * @return 대기 중인 작업 개수
+			 */
 			size_t numEnqueued() const { return tasks.size(); }
+			
+			/**
+			 * @brief 모든 작업이 완료될 때까지 기다립니다.
+			 */
 			void joinAll();
 		private:
 			std::vector<std::thread> workers;
--- a/include/kiwi/Utils.h
+++ b/include/kiwi/Utils.h
@ -1,4 +1,15 @@
-#pragma once
+/**
+ * @file Utils.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 유틸리티 함수 및 헬퍼 함수 모음
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * UTF-8/UTF-16 인코딩 변환, 한글 처리, 품사 태그 변환 등
+ * 다양한 유틸리티 함수들을 제공합니다.
+ */
+
+#pragma once
 #include <iostream>
 #include <string>
 #include <memory>
@ -30,25 +41,88 @@ namespace kiwi
 		return std::unique_ptr<T>(new typename std::remove_extent<T>::type[size]);
 	}

+	/**
+	 * @brief UTF-8 문자열을 UTF-16 문자열로 변환합니다.
+	 * @param str UTF-8 문자열
+	 * @return 변환된 UTF-16 문자열
+	 */
 	std::u16string utf8To16(const std::string& str);
+	
+	/**
+	 * @brief UTF-8 문자열을 UTF-16으로 변환하고 바이트 위치를 추적합니다.
+	 * @param str UTF-8 문자열
+	 * @param bytePositions UTF-8 바이트 위치를 저장할 벡터
+	 * @return 변환된 UTF-16 문자열
+	 */
 	std::u16string utf8To16(const std::string& str, std::vector<size_t>& bytePositions);
+	
+	/**
+	 * @brief 유니코드 코드포인트를 UTF-8 문자열로 변환합니다.
+	 * @param code 유니코드 코드포인트
+	 * @return UTF-8 문자열
+	 */
 	std::string utf8FromCode(char32_t code);
+	
 	size_t utf8FromCode(std::string& ret, char32_t code);
+	
+	/**
+	 * @brief UTF-16 문자열을 UTF-8 문자열로 변환합니다.
+	 * @param str UTF-16 문자열
+	 * @return 변환된 UTF-8 문자열
+	 */
 	std::string utf16To8(const std::u16string& str);
+	
+	/**
+	 * @brief 한글 문자열을 정규화합니다.
+	 * @param hangul 한글 문자열
+	 * @return 정규화된 한글 문자열
+	 */
 	KString normalizeHangul(const std::u16string& hangul);

+	/**
+	 * @brief 품사 태그가 웹 관련 태그인지 확인합니다.
+	 * @param t 품사 태그
+	 * @return 웹 태그(URL, 해시태그, 멘션, 이모지)이면 true
+	 */
 	inline bool isWebTag(POSTag t)
 	{
 		return POSTag::w_url <= t && t <= POSTag::w_emoji;
 	}

+	/**
+	 * @brief 문자열을 품사 태그로 변환합니다.
+	 * @param tagStr 품사 태그 문자열
+	 * @return 품사 태그 열거형
+	 */
 	POSTag toPOSTag(const std::u16string& tagStr);
+	
+	/**
+	 * @brief 품사 태그를 문자열로 변환합니다.
+	 * @param t 품사 태그
+	 * @return 품사 태그 문자열
+	 */
 	const char* tagToString(POSTag t);
+	
+	/**
+	 * @brief 품사 태그를 한글 문자열로 변환합니다.
+	 * @param t 품사 태그
+	 * @return 품사 태그 한글 문자열
+	 */
 	const kchar_t* tagToKString(POSTag t);
 	
 	const char* tagRToString(char16_t form, POSTag t);
 	const kchar_t* tagRToKString(char16_t form, POSTag t);

+	/**
+	 * @brief 값이 범위 내에 있는지 확인합니다.
+	 * @tparam A 값의 타입
+	 * @tparam B 하한의 타입
+	 * @tparam C 상한의 타입
+	 * @param value 확인할 값
+	 * @param lower 하한 (포함)
+	 * @param upper 상한 (미포함)
+	 * @return lower <= value < upper이면 true
+	 */
 	template<class A, class B, class C>
 	inline bool within(A value, B lower, C upper)
 	{
@ -61,41 +135,82 @@ namespace kiwi
 		return cont.data() <= value && value < cont.data() + cont.size();
 	}

+	/**
+	 * @brief 문자가 한글 음절인지 확인합니다.
+	 * @param chr 확인할 문자
+	 * @return 한글 음절 (가-힣) 범위이면 true
+	 */
 	inline bool isHangulSyllable(char16_t chr)
 	{
 		return within(chr, 0xAC00, 0xD7A4);
 	}

+	/**
+	 * @brief 문자가 한글 초성인지 확인합니다.
+	 * @param chr 확인할 문자
+	 * @return 한글 초성이면 true
+	 */
 	inline bool isHangulOnset(char16_t chr)
 	{
 		return within(chr, 0x1100, 0x1100 + 19);
 	}

+	/**
+	 * @brief 문자가 한글 종성인지 확인합니다.
+	 * @param chr 확인할 문자
+	 * @return 한글 종성이면 true
+	 */
 	inline bool isHangulCoda(char16_t chr)
 	{
 		return within(chr, 0x11A8, 0x11A8 + 27);
 	}

+	/**
+	 * @brief 문자가 한글 모음인지 확인합니다.
+	 * @param chr 확인할 문자
+	 * @return 한글 모음이면 true
+	 */
 	inline bool isHangulVowel(char16_t chr)
 	{
 		return within(chr, 0x314F, 0x3164);
 	}

+	/**
+	 * @brief 초성과 중성을 결합하여 한글 음절을 만듭니다.
+	 * @param onset 초성 인덱스
+	 * @param vowel 중성 인덱스
+	 * @return 결합된 한글 음절
+	 */
 	inline char16_t joinOnsetVowel(size_t onset, size_t vowel)
 	{
 		return 0xAC00 + (char16_t)((onset * 21 + vowel) * 28);
 	}

+	/**
+	 * @brief 한글 음절에서 중성을 추출합니다.
+	 * @param chr 한글 음절
+	 * @return 중성 인덱스
+	 */
 	inline int extractVowel(char16_t chr)
 	{
 		return ((chr - 0xAC00) / 28) % 21;
 	}

+	/**
+	 * @brief 문자가 옛한글 초성인지 확인합니다.
+	 * @param chr 확인할 문자
+	 * @return 옛한글 초성이면 true
+	 */
 	inline bool isOldHangulOnset(char16_t chr)
 	{
 		return within(chr, 0x1100, 0x1160) || within(chr, 0xA960, 0xA980);
 	}

+	/**
+	 * @brief 문자가 옛한글 모음인지 확인합니다.
+	 * @param chr 확인할 문자
+	 * @return 옛한글 모음이면 true
+	 */
 	inline bool isOldHangulVowel(char16_t chr)
 	{
 		return within(chr, 0x1160, 0x11A8) || within(chr, 0xD7B0, 0xD7CB);
--- a/include/kiwi/WordDetector.h
+++ b/include/kiwi/WordDetector.h
@ -1,15 +1,33 @@
-#pragma once
+/**
+ * @file WordDetector.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief 텍스트에서 미등록 단어를 추출하는 WordDetector 클래스 정의
+ * @version 0.22.1
+ * @date 2025-11-21
+ * 
+ * 텍스트 말뭉치에서 통계적 방법을 사용하여 사전에 없는 새로운 단어를 발견합니다.
+ * 응집도(cohesion)와 분기 엔트로피(branching entropy)를 활용한 단어 추출 알고리즘을 구현합니다.
+ */
+
+#pragma once

 #include <kiwi/Types.h>

 namespace kiwi
 {
+	/**
+	 * @brief 추출된 단어의 정보를 담는 구조체
+	 */
 	struct WordInfo
 	{
-		std::u16string form;
-		float score, lBranch, rBranch, lCohesion, rCohesion;
-		uint32_t freq;
-		std::map<POSTag, float> posScore;
+		std::u16string form;          /**< 단어의 표면형 */
+		float score;                  /**< 단어 점수 */
+		float lBranch;                /**< 좌측 분기 엔트로피 */
+		float rBranch;                /**< 우측 분기 엔트로피 */
+		float lCohesion;              /**< 좌측 응집도 */
+		float rCohesion;              /**< 우측 응집도 */
+		uint32_t freq;                /**< 출현 빈도 */
+		std::map<POSTag, float> posScore; /**< 품사별 점수 */

 		WordInfo(std::u16string _form = {},
 			float _score = 0, float _lBranch = 0, float _rBranch = 0,
@ -20,6 +38,12 @@ namespace kiwi
 		{}
 	};

+	/**
+	 * @brief 텍스트 말뭉치로부터 미등록 단어를 추출하는 클래스
+	 * 
+	 * 통계적 방법을 사용하여 텍스트에서 의미 있는 단어를 자동으로 발견합니다.
+	 * 응집도와 분기 엔트로피 등의 지표를 계산하여 단어 후보를 평가합니다.
+	 */
 	class WordDetector
 	{
 		struct Counter;
@ -38,20 +62,59 @@ namespace kiwi
 		std::map<POSTag, float> getPosScore(Counter&, const std::map<std::u16string, uint32_t>& cnt, std::map<std::u16string, uint32_t>::iterator it, bool coda, const std::u16string& realForm) const;
 	public:

+		/**
+		 * @brief 원시 데이터로부터 모델을 생성할 때 사용하는 태그
+		 */
 		struct FromRawData {};
 		static constexpr FromRawData fromRawDataTag = {};

 		WordDetector() = default;
+		
+		/**
+		 * @brief 사전 학습된 모델을 로드하여 WordDetector를 생성합니다.
+		 * @param modelPath 모델 파일 경로
+		 * @param _numThreads 사용할 스레드 수 (-1이면 자동)
+		 */
 		WordDetector(const std::string& modelPath, size_t _numThreads = -1);
+		
+		/**
+		 * @brief 원시 데이터로부터 WordDetector를 생성합니다.
+		 * @param tag FromRawData 태그
+		 * @param modelPath 원시 데이터 경로
+		 * @param _numThreads 사용할 스레드 수 (-1이면 자동)
+		 */
 		WordDetector(FromRawData, const std::string& modelPath, size_t _numThreads = -1);
+		
+		/**
+		 * @brief 스트림 제공자를 사용하여 WordDetector를 생성합니다.
+		 * @param streamProvider 스트림 제공 함수
+		 * @param _numThreads 사용할 스레드 수 (-1이면 자동)
+		 */
 		WordDetector(const std::function<std::unique_ptr<std::istream>(const std::string&)>& streamProvider, size_t _numThreads = -1);

+		/**
+		 * @brief WordDetector가 사용 가능한 상태인지 확인합니다.
+		 * @return 모델이 로드되어 사용 가능하면 true
+		 */
 		bool ready() const
 		{
 			return !posScore.empty();
 		}

+		/**
+		 * @brief 학습된 모델을 파일로 저장합니다.
+		 * @param modelPath 저장할 모델 파일 경로
+		 */
 		void saveModel(const std::string& modelPath) const;
+		
+		/**
+		 * @brief 텍스트에서 단어를 추출합니다.
+		 * @param reader 텍스트 데이터 리더
+		 * @param minCnt 최소 출현 빈도
+		 * @param maxWordLen 최대 단어 길이
+		 * @param minScore 최소 단어 점수
+		 * @return 추출된 단어 정보 벡터
+		 */
 		std::vector<WordInfo> extractWords(const U16MultipleReader& reader, size_t minCnt = 10, size_t maxWordLen = 10, float minScore = 0.1f) const;
 	};

--- a/third_party/cpp-btree
+++ b/third_party/cpp-btree
@ -1 +1 @@
-Subproject commit 226ce3aed24702bef1b03dba4b3cb55bc0bf31dd
+Subproject commit f38e229e754f90fa06b0a99ae7fbbcfcbe7dcabc
--- a/third_party/cpuinfo
+++ b/third_party/cpuinfo
@ -1 +1 @@
-Subproject commit 05dd959fa26c7e68fa229495a35f55e06a3b9655
+Subproject commit c4b4f4bf08c0cf486fc3111d0244ebf2a48ad01b
--- a/third_party/googletest
+++ b/third_party/googletest
@ -1 +1 @@
-Subproject commit 52eb8108c5bdec04579160ae17225d66034bd723
+Subproject commit ff6133ab49b364a883a55ba75c39e520fea6245b
--- a/third_party/json
+++ b/third_party/json
@ -1 +1 @@
-Subproject commit 55f93686c01528224f448c19128836e7df245f72
+Subproject commit 5ed07097faa6c50199c4a3b66e5ed37d4fbfccc2
--- a/third_party/mimalloc
+++ b/third_party/mimalloc
@ -1 +1 @@
-Subproject commit fbd8b99c2b828428947d70fdc046bb55609be93e
+Subproject commit f0cd5505aa102cee991be0367b82506638a16281
Author	SHA1	Message	Date
copilot-swe-agent[bot]	4961562a30	Fix spacing/formatting issues in documentation comments Co-authored-by: bab2min <19266222+bab2min@users.noreply.github.com>	2026-01-15 06:14:29 +00:00
copilot-swe-agent[bot]	63c346e69a	Add Doxygen documentation to PatternMatcher, TagUtils, and Mmap Co-authored-by: bab2min <19266222+bab2min@users.noreply.github.com>	2026-01-15 06:10:54 +00:00
copilot-swe-agent[bot]	6c80208796	Add Doxygen documentation to SkipBigramModel, CoNgramModel, ThreadPool, and Utils Co-authored-by: bab2min <19266222+bab2min@users.noreply.github.com>	2026-01-15 06:08:58 +00:00
copilot-swe-agent[bot]	3b4bc20d54	Add Doxygen documentation to LangModel, Joiner, Knlm, WordDetector, and FrozenTrie Co-authored-by: bab2min <19266222+bab2min@users.noreply.github.com>	2026-01-15 06:05:47 +00:00
copilot-swe-agent[bot]	062ba419c6	Initial plan	2026-01-15 05:57:54 +00:00