Bundle korean-spell-check script inside skill directory for packageless installs

The Python helper lived only in the repo-root scripts/ folder, so `skills add` never shipped it. Move the real implementation into korean-spell-check/scripts/ (mirroring joseon-sillok-search) and replace the root copy with a thin re-export wrapper so lint/test still resolve from the repo root. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-06-24 02:04:11 +00:00 · 2026-04-06 15:11:24 +09:00 · 2026-04-06 15:11:24 +09:00 · 50e4bdd769
commit 50e4bdd769
parent f043b5724b
3 changed files with 530 additions and 517 deletions
--- a/korean-spell-check/SKILL.md
+++ b/korean-spell-check/SKILL.md
@ -43,7 +43,7 @@ metadata:

 - 인터넷 연결
 - `python3` 3.10+
- 이 저장소의 `scripts/korean_spell_check.py`
+- 이 스킬 디렉토리의 `scripts/korean_spell_check.py` (설치 시 자동 포함)

 ## Verified surface notes

--- a/korean-spell-check/scripts/korean_spell_check.py
+++ b/korean-spell-check/scripts/korean_spell_check.py
@ -0,0 +1,523 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from dataclasses import asdict, dataclass
+from html import unescape
+from pathlib import Path
+from typing import Callable
+
+DEFAULT_RESULTS_URL = "https://nara-speller.co.kr/old_speller/results"
+DEFAULT_MAX_CHARS = 1500
+DEFAULT_TIMEOUT = 30
+DEFAULT_THROTTLE_SECONDS = 1.2
+RESULT_PAYLOAD_PATTERN = re.compile(r"data\s*=\s*(\[[\s\S]*?\]);\s*pageIdx\s*=")
+NO_ISSUES_PATTERN = re.compile(r"맞춤법과\s*문법\s*오류를\s*찾지\s*못했습니다", re.MULTILINE)
+TAG_PATTERN = re.compile(r"<[^>]+>")
+LINE_BREAK_PATTERN = re.compile(r"<br\s*/?>", re.IGNORECASE)
+SENTENCE_BOUNDARY_PATTERN = re.compile(r"(?<=[.!?。！？])\s+")
+PARAGRAPH_SEPARATOR_PATTERN = re.compile(r"\n(?:[ \t]*\n)+")
+
+DEFAULT_HEADERS = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language": "ko,en-US;q=0.9,en;q=0.8",
+    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+    "Origin": "https://nara-speller.co.kr",
+    "Referer": "https://nara-speller.co.kr/old_speller/",
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
+    ),
+}
+
+
+@dataclass(frozen=True)
+class SpellCheckIssue:
+    chunk_index: int
+    page_index: int
+    issue_index: int
+    sentence: str
+    original: str
+    suggestions: list[str]
+    reason: str
+    start: int | None
+    end: int | None
+    correct_method: int | None
+    error_message: str
+
+
+def strip_html(value: str | None) -> str:
+    text = LINE_BREAK_PATTERN.sub("\n", value or "")
+    text = TAG_PATTERN.sub("", text)
+    return unescape(text).strip()
+
+
+def split_candidates(value: str | None) -> list[str]:
+    return [candidate.strip() for candidate in str(value or "").split("|") if candidate.strip()]
+
+
+def parse_positive_int(raw_value: str) -> int:
+    value = int(raw_value)
+    if value <= 0:
+        raise argparse.ArgumentTypeError("must be a positive integer")
+    return value
+
+
+def split_text_into_chunks(text: str, max_chars: int = DEFAULT_MAX_CHARS) -> list[str]:
+    original = str(text or "")
+    if not original.strip():
+        return []
+
+    units = split_paragraph_units(original)
+    chunks: list[str] = []
+    current = ""
+
+    for unit in units:
+        candidate = unit if not current else f"{current}{unit}"
+
+        if len(candidate) <= max_chars:
+            current = candidate
+            continue
+
+        if current:
+            chunks.append(current)
+            current = ""
+
+        if len(unit) <= max_chars:
+            current = unit
+            continue
+
+        separator = ""
+        body = unit
+        separator_match = PARAGRAPH_SEPARATOR_PATTERN.search(unit)
+
+        if separator_match and separator_match.end() == len(unit):
+            separator = separator_match.group(0)
+            body = unit[: separator_match.start()]
+
+        for sentence in split_long_paragraph(body, max_chars=max_chars):
+            if len(sentence) <= max_chars:
+                chunks.append(sentence)
+                continue
+
+            start = 0
+            while start < len(sentence):
+                chunks.append(sentence[start : start + max_chars])
+                start += max_chars
+
+        if separator:
+            if chunks and len(chunks[-1]) + len(separator) <= max_chars:
+                chunks[-1] += separator
+            else:
+                current = separator
+
+    if current:
+        chunks.append(current)
+
+    return chunks
+
+
+def split_paragraph_units(text: str) -> list[str]:
+    units: list[str] = []
+    start = 0
+
+    for match in PARAGRAPH_SEPARATOR_PATTERN.finditer(text):
+        paragraph = text[start : match.start()]
+        separator = match.group(0)
+
+        if paragraph:
+            units.append(paragraph + separator)
+        elif units:
+            units[-1] += separator
+        else:
+            units.append(separator)
+
+        start = match.end()
+
+    tail = text[start:]
+    if tail:
+        units.append(tail)
+
+    return units
+
+
+def split_long_paragraph(paragraph: str, *, max_chars: int) -> list[str]:
+    sentence_boundaries = list(SENTENCE_BOUNDARY_PATTERN.finditer(paragraph))
+
+    if not sentence_boundaries:
+        return [paragraph]
+
+    sentences: list[str] = []
+    start = 0
+
+    for boundary in sentence_boundaries:
+        sentences.append(paragraph[start : boundary.end()])
+        start = boundary.end()
+
+    if start < len(paragraph):
+        sentences.append(paragraph[start:])
+
+    groups: list[str] = []
+    current = ""
+
+    for sentence in sentences:
+        candidate = sentence if not current else f"{current}{sentence}"
+
+        if len(candidate) <= max_chars:
+            current = candidate
+            continue
+
+        if current:
+            groups.append(current)
+        current = sentence
+
+    if current:
+        groups.append(current)
+
+    return groups
+
+
+def fetch_spell_check_html(
+    text: str,
+    *,
+    strong_rules: bool = True,
+    timeout: int = DEFAULT_TIMEOUT,
+    url: str = DEFAULT_RESULTS_URL,
+) -> str:
+    body = {
+        "text1": text,
+        "chkKey": "",
+    }
+
+    if strong_rules:
+        body["btnModeChange"] = "on"
+
+    request = urllib.request.Request(
+        url,
+        data=urllib.parse.urlencode(body).encode("utf-8"),
+        headers=DEFAULT_HEADERS,
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as response:
+            return response.read().decode("utf-8", "ignore")
+    except urllib.error.HTTPError as error:  # type: ignore[attr-defined]
+        if error.code == 403:
+            raise RuntimeError(
+                "The spell-check service returned HTTP 403. "
+                "This environment may be hitting a Cloudflare/browser challenge. "
+                "Retry later with lower request volume or from a browser-friendly network."
+            ) from error
+
+        raise RuntimeError(f"The spell-check service returned HTTP {error.code}.") from error
+
+
+def extract_result_payload(html: str) -> list[dict]:
+    match = RESULT_PAYLOAD_PATTERN.search(html)
+
+    if not match:
+        if NO_ISSUES_PATTERN.search(html):
+            return []
+        raise ValueError("Unable to find the spell-check payload in the returned HTML.")
+
+    payload = json.loads(match.group(1))
+
+    if not isinstance(payload, list):
+        raise ValueError("The extracted spell-check payload was not a list.")
+
+    return payload
+
+
+def apply_page_corrections(page: dict) -> str:
+    source = str(page.get("str", ""))
+    corrected = source
+
+    for error in sorted(page.get("errInfo", []), key=lambda item: int(item.get("start", -1)), reverse=True):
+        suggestions = split_candidates(error.get("candWord"))
+        original = str(error.get("orgStr", ""))
+
+        if not suggestions:
+            continue
+
+        start = int(error.get("start", -1))
+        end = int(error.get("end", -1))
+
+        if start < 0 or end < start or end >= len(source):
+            continue
+
+        slice_end = end + 1
+        if original:
+            while slice_end > start and source[start:slice_end] != original and source[start : slice_end - 1] == original:
+                slice_end -= 1
+
+        corrected = f"{corrected[:start]}{suggestions[0]}{corrected[slice_end:]}"
+
+    return corrected
+
+
+def build_visible_text_index(text: str) -> tuple[str, list[int], list[int | None]]:
+    visible_chars: list[str] = []
+    visible_indices: list[int] = []
+    visible_lookup: list[int | None] = []
+
+    for index, char in enumerate(text):
+        if char.isspace():
+            visible_lookup.append(None)
+            continue
+
+        visible_lookup.append(len(visible_indices))
+        visible_chars.append(char)
+        visible_indices.append(index)
+
+    return "".join(visible_chars), visible_indices, visible_lookup
+
+
+def preserve_original_layout(original: str, suggestion: str) -> str:
+    if "\n" not in original:
+        return suggestion
+
+    original_visible, original_visible_indices, _ = build_visible_text_index(original)
+    suggestion_visible, suggestion_visible_indices, _ = build_visible_text_index(suggestion)
+
+    if original_visible != suggestion_visible:
+        return suggestion
+
+    if not original_visible_indices or not suggestion_visible_indices:
+        return original if original.strip() else suggestion
+
+    merged: list[str] = []
+    leading_original = original[: original_visible_indices[0]]
+    leading_suggestion = suggestion[: suggestion_visible_indices[0]]
+    merged.append(leading_original if leading_original.isspace() else leading_suggestion)
+
+    for ordinal, suggestion_index in enumerate(suggestion_visible_indices):
+        merged.append(suggestion[suggestion_index])
+
+        next_original_index = original_visible_indices[ordinal + 1] if ordinal + 1 < len(original_visible_indices) else None
+        next_suggestion_index = (
+            suggestion_visible_indices[ordinal + 1] if ordinal + 1 < len(suggestion_visible_indices) else None
+        )
+
+        original_gap = (
+            original[original_visible_indices[ordinal] + 1 : next_original_index]
+            if next_original_index is not None
+            else original[original_visible_indices[ordinal] + 1 :]
+        )
+        suggestion_gap = (
+            suggestion[suggestion_index + 1 : next_suggestion_index]
+            if next_suggestion_index is not None
+            else suggestion[suggestion_index + 1 :]
+        )
+
+        merged.append(original_gap if "\n" in original_gap else suggestion_gap)
+
+    return "".join(merged)
+
+
+def apply_chunk_corrections(chunk: str, pages: list[dict]) -> str:
+    combined_source = "".join(str(page.get("str", "")) for page in pages)
+    fallback = "".join(apply_page_corrections(page) for page in pages) or chunk
+
+    if not combined_source:
+        return fallback
+
+    chunk_visible, chunk_visible_indices, _ = build_visible_text_index(chunk)
+    source_visible, _, source_visible_lookup = build_visible_text_index(combined_source)
+
+    if chunk_visible != source_visible:
+        return fallback
+
+    replacements: list[tuple[int, int, str, str]] = []
+    page_offset = 0
+
+    for page in pages:
+        for error in page.get("errInfo", []):
+            suggestions = split_candidates(error.get("candWord"))
+            if not suggestions:
+                continue
+
+            start = int(error.get("start", -1))
+            end = int(error.get("end", -1))
+
+            if start < 0 or end < start:
+                continue
+
+            start += page_offset
+            end += page_offset
+
+            visible_ordinals = [
+                source_visible_lookup[index]
+                for index in range(start, min(end + 1, len(source_visible_lookup)))
+                if source_visible_lookup[index] is not None
+            ]
+
+            if not visible_ordinals:
+                continue
+
+            original_start = chunk_visible_indices[visible_ordinals[0]]
+            original_end = chunk_visible_indices[visible_ordinals[-1]]
+            replacements.append((original_start, original_end, suggestions[0], str(error.get("orgStr", ""))))
+
+        page_offset += len(str(page.get("str", "")))
+
+    if not replacements:
+        return chunk
+
+    corrected = chunk
+
+    for start, end, suggestion, original in sorted(replacements, key=lambda item: item[0], reverse=True):
+        slice_end = end + 1
+        if original:
+            while (
+                slice_end > start
+                and corrected[start:slice_end] != original
+                and corrected[start : slice_end - 1] == original
+            ):
+                slice_end -= 1
+
+        original_slice = corrected[start:slice_end]
+        replacement = preserve_original_layout(original_slice, suggestion)
+        corrected = f"{corrected[:start]}{replacement}{corrected[slice_end:]}"
+
+    return corrected
+
+
+def build_issue(chunk_index: int, page_index: int, issue_index: int, page: dict, error: dict) -> SpellCheckIssue:
+    return SpellCheckIssue(
+        chunk_index=chunk_index,
+        page_index=page_index,
+        issue_index=issue_index,
+        sentence=str(page.get("str", "")),
+        original=str(error.get("orgStr", "")),
+        suggestions=split_candidates(error.get("candWord")),
+        reason=strip_html(error.get("help")) or strip_html(error.get("errMsg")),
+        start=int(error["start"]) if str(error.get("start", "")).strip() else None,
+        end=int(error["end"]) if str(error.get("end", "")).strip() else None,
+        correct_method=int(error["correctMethod"])
+        if str(error.get("correctMethod", "")).strip()
+        else None,
+        error_message=strip_html(error.get("errMsg")),
+    )
+
+
+def check_text(
+    text: str,
+    *,
+    max_chars: int = DEFAULT_MAX_CHARS,
+    strong_rules: bool = True,
+    timeout: int = DEFAULT_TIMEOUT,
+    throttle_seconds: float = DEFAULT_THROTTLE_SECONDS,
+    requester: Callable[..., str] = fetch_spell_check_html,
+    sleep_fn: Callable[[float], None] = time.sleep,
+) -> dict:
+    chunks = split_text_into_chunks(text, max_chars=max_chars)
+    corrected_chunks: list[str] = []
+    issues: list[SpellCheckIssue] = []
+    chunk_reports: list[dict] = []
+
+    for chunk_index, chunk in enumerate(chunks):
+        if chunk_index > 0 and throttle_seconds > 0:
+            sleep_fn(throttle_seconds)
+
+        html = requester(chunk, strong_rules=strong_rules, timeout=timeout)
+        pages = extract_result_payload(html)
+        corrected_chunk = apply_chunk_corrections(chunk, pages)
+
+        corrected_chunks.append(corrected_chunk)
+        chunk_reports.append(
+            {
+                "chunk_index": chunk_index,
+                "original_text": chunk,
+                "corrected_text": corrected_chunk,
+                "page_count": len(pages),
+            }
+        )
+
+        for page_index, page in enumerate(pages):
+            for issue_index, error in enumerate(page.get("errInfo", [])):
+                issues.append(build_issue(chunk_index, page_index, issue_index, page, error))
+
+    return {
+        "original_text": str(text or ""),
+        "corrected_text": "".join(corrected_chunks),
+        "chunks": chunk_reports,
+        "issues": issues,
+        "meta": {
+            "chunk_count": len(chunks),
+            "strong_rules": strong_rules,
+            "max_chars": max_chars,
+        },
+    }
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the official Nara/PNU Korean spell checker.")
+    parser.add_argument("--text", help="Inline Korean text to inspect.")
+    parser.add_argument("--file", help="UTF-8 text/markdown file to inspect.")
+    parser.add_argument("--max-chars", type=parse_positive_int, default=DEFAULT_MAX_CHARS)
+    parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT)
+    parser.add_argument("--throttle-seconds", type=float, default=DEFAULT_THROTTLE_SECONDS)
+    parser.add_argument("--weak-rules", action="store_true", help="Disable the strong-rules checkbox.")
+    parser.add_argument("--format", choices=["json", "text"], default="json")
+    args = parser.parse_args(argv)
+
+    if not args.text and not args.file:
+        parser.error("Either --text or --file is required.")
+
+    return args
+
+
+def load_input(args: argparse.Namespace) -> str:
+    if args.text:
+        return args.text
+
+    return Path(args.file).read_text(encoding="utf-8")
+
+
+def serialize_report(report: dict) -> dict:
+    return {
+        **report,
+        "issues": [asdict(issue) for issue in report["issues"]],
+    }
+
+
+def print_text_report(report: dict) -> None:
+    print("# corrected_text")
+    print(report["corrected_text"])
+    print()
+    print("# issues")
+
+    for issue in report["issues"]:
+        print(f"- chunk={issue.chunk_index} page={issue.page_index} issue={issue.issue_index}")
+        print(f"  original: {issue.original}")
+        print(f"  suggestions: {', '.join(issue.suggestions) if issue.suggestions else '(없음)'}")
+        print(f"  reason: {issue.reason or '(없음)'}")
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv or sys.argv[1:])
+    report = check_text(
+        load_input(args),
+        max_chars=args.max_chars,
+        strong_rules=not args.weak_rules,
+        timeout=args.timeout,
+        throttle_seconds=args.throttle_seconds,
+    )
+
+    if args.format == "json":
+        print(json.dumps(serialize_report(report), ensure_ascii=False, indent=2))
+    else:
+        print_text_report(report)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/korean_spell_check.py
+++ b/scripts/korean_spell_check.py
@ -1,523 +1,13 @@
 from __future__ import annotations

-import argparse
-import json
-import re
-import sys
-import time
-import urllib.error
-import urllib.parse
-import urllib.request
-from dataclasses import asdict, dataclass
-from html import unescape
 from pathlib import Path
-from typing import Callable

-DEFAULT_RESULTS_URL = "https://nara-speller.co.kr/old_speller/results"
-DEFAULT_MAX_CHARS = 1500
-DEFAULT_TIMEOUT = 30
-DEFAULT_THROTTLE_SECONDS = 1.2
-RESULT_PAYLOAD_PATTERN = re.compile(r"data\s*=\s*(\[[\s\S]*?\]);\s*pageIdx\s*=")
-NO_ISSUES_PATTERN = re.compile(r"맞춤법과\s*문법\s*오류를\s*찾지\s*못했습니다", re.MULTILINE)
-TAG_PATTERN = re.compile(r"<[^>]+>")
-LINE_BREAK_PATTERN = re.compile(r"<br\s*/?>", re.IGNORECASE)
-SENTENCE_BOUNDARY_PATTERN = re.compile(r"(?<=[.!?。！？])\s+")
-PARAGRAPH_SEPARATOR_PATTERN = re.compile(r"\n(?:[ \t]*\n)+")

-DEFAULT_HEADERS = {
-    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-    "Accept-Language": "ko,en-US;q=0.9,en;q=0.8",
-    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
-    "Origin": "https://nara-speller.co.kr",
-    "Referer": "https://nara-speller.co.kr/old_speller/",
-    "User-Agent": (
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
-    ),
-}
+_BUNDLED_HELPER = (
+    Path(__file__).resolve().parent.parent / "korean-spell-check" / "scripts" / "korean_spell_check.py"
+)

+if not _BUNDLED_HELPER.exists():  # pragma: no cover - defensive import guard
+    raise FileNotFoundError(f"Bundled spell-check helper not found: {_BUNDLED_HELPER}")

-@dataclass(frozen=True)
-class SpellCheckIssue:
-    chunk_index: int
-    page_index: int
-    issue_index: int
-    sentence: str
-    original: str
-    suggestions: list[str]
-    reason: str
-    start: int | None
-    end: int | None
-    correct_method: int | None
-    error_message: str
-
-
-def strip_html(value: str | None) -> str:
-    text = LINE_BREAK_PATTERN.sub("\n", value or "")
-    text = TAG_PATTERN.sub("", text)
-    return unescape(text).strip()
-
-
-def split_candidates(value: str | None) -> list[str]:
-    return [candidate.strip() for candidate in str(value or "").split("|") if candidate.strip()]
-
-
-def parse_positive_int(raw_value: str) -> int:
-    value = int(raw_value)
-    if value <= 0:
-        raise argparse.ArgumentTypeError("must be a positive integer")
-    return value
-
-
-def split_text_into_chunks(text: str, max_chars: int = DEFAULT_MAX_CHARS) -> list[str]:
-    original = str(text or "")
-    if not original.strip():
-        return []
-
-    units = split_paragraph_units(original)
-    chunks: list[str] = []
-    current = ""
-
-    for unit in units:
-        candidate = unit if not current else f"{current}{unit}"
-
-        if len(candidate) <= max_chars:
-            current = candidate
-            continue
-
-        if current:
-            chunks.append(current)
-            current = ""
-
-        if len(unit) <= max_chars:
-            current = unit
-            continue
-
-        separator = ""
-        body = unit
-        separator_match = PARAGRAPH_SEPARATOR_PATTERN.search(unit)
-
-        if separator_match and separator_match.end() == len(unit):
-            separator = separator_match.group(0)
-            body = unit[: separator_match.start()]
-
-        for sentence in split_long_paragraph(body, max_chars=max_chars):
-            if len(sentence) <= max_chars:
-                chunks.append(sentence)
-                continue
-
-            start = 0
-            while start < len(sentence):
-                chunks.append(sentence[start : start + max_chars])
-                start += max_chars
-
-        if separator:
-            if chunks and len(chunks[-1]) + len(separator) <= max_chars:
-                chunks[-1] += separator
-            else:
-                current = separator
-
-    if current:
-        chunks.append(current)
-
-    return chunks
-
-
-def split_paragraph_units(text: str) -> list[str]:
-    units: list[str] = []
-    start = 0
-
-    for match in PARAGRAPH_SEPARATOR_PATTERN.finditer(text):
-        paragraph = text[start : match.start()]
-        separator = match.group(0)
-
-        if paragraph:
-            units.append(paragraph + separator)
-        elif units:
-            units[-1] += separator
-        else:
-            units.append(separator)
-
-        start = match.end()
-
-    tail = text[start:]
-    if tail:
-        units.append(tail)
-
-    return units
-
-
-def split_long_paragraph(paragraph: str, *, max_chars: int) -> list[str]:
-    sentence_boundaries = list(SENTENCE_BOUNDARY_PATTERN.finditer(paragraph))
-
-    if not sentence_boundaries:
-        return [paragraph]
-
-    sentences: list[str] = []
-    start = 0
-
-    for boundary in sentence_boundaries:
-        sentences.append(paragraph[start : boundary.end()])
-        start = boundary.end()
-
-    if start < len(paragraph):
-        sentences.append(paragraph[start:])
-
-    groups: list[str] = []
-    current = ""
-
-    for sentence in sentences:
-        candidate = sentence if not current else f"{current}{sentence}"
-
-        if len(candidate) <= max_chars:
-            current = candidate
-            continue
-
-        if current:
-            groups.append(current)
-        current = sentence
-
-    if current:
-        groups.append(current)
-
-    return groups
-
-
-def fetch_spell_check_html(
-    text: str,
-    *,
-    strong_rules: bool = True,
-    timeout: int = DEFAULT_TIMEOUT,
-    url: str = DEFAULT_RESULTS_URL,
-) -> str:
-    body = {
-        "text1": text,
-        "chkKey": "",
-    }
-
-    if strong_rules:
-        body["btnModeChange"] = "on"
-
-    request = urllib.request.Request(
-        url,
-        data=urllib.parse.urlencode(body).encode("utf-8"),
-        headers=DEFAULT_HEADERS,
-        method="POST",
-    )
-
-    try:
-        with urllib.request.urlopen(request, timeout=timeout) as response:
-            return response.read().decode("utf-8", "ignore")
-    except urllib.error.HTTPError as error:  # type: ignore[attr-defined]
-        if error.code == 403:
-            raise RuntimeError(
-                "The spell-check service returned HTTP 403. "
-                "This environment may be hitting a Cloudflare/browser challenge. "
-                "Retry later with lower request volume or from a browser-friendly network."
-            ) from error
-
-        raise RuntimeError(f"The spell-check service returned HTTP {error.code}.") from error
-
-
-def extract_result_payload(html: str) -> list[dict]:
-    match = RESULT_PAYLOAD_PATTERN.search(html)
-
-    if not match:
-        if NO_ISSUES_PATTERN.search(html):
-            return []
-        raise ValueError("Unable to find the spell-check payload in the returned HTML.")
-
-    payload = json.loads(match.group(1))
-
-    if not isinstance(payload, list):
-        raise ValueError("The extracted spell-check payload was not a list.")
-
-    return payload
-
-
-def apply_page_corrections(page: dict) -> str:
-    source = str(page.get("str", ""))
-    corrected = source
-
-    for error in sorted(page.get("errInfo", []), key=lambda item: int(item.get("start", -1)), reverse=True):
-        suggestions = split_candidates(error.get("candWord"))
-        original = str(error.get("orgStr", ""))
-
-        if not suggestions:
-            continue
-
-        start = int(error.get("start", -1))
-        end = int(error.get("end", -1))
-
-        if start < 0 or end < start or end >= len(source):
-            continue
-
-        slice_end = end + 1
-        if original:
-            while slice_end > start and source[start:slice_end] != original and source[start : slice_end - 1] == original:
-                slice_end -= 1
-
-        corrected = f"{corrected[:start]}{suggestions[0]}{corrected[slice_end:]}"
-
-    return corrected
-
-
-def build_visible_text_index(text: str) -> tuple[str, list[int], list[int | None]]:
-    visible_chars: list[str] = []
-    visible_indices: list[int] = []
-    visible_lookup: list[int | None] = []
-
-    for index, char in enumerate(text):
-        if char.isspace():
-            visible_lookup.append(None)
-            continue
-
-        visible_lookup.append(len(visible_indices))
-        visible_chars.append(char)
-        visible_indices.append(index)
-
-    return "".join(visible_chars), visible_indices, visible_lookup
-
-
-def preserve_original_layout(original: str, suggestion: str) -> str:
-    if "\n" not in original:
-        return suggestion
-
-    original_visible, original_visible_indices, _ = build_visible_text_index(original)
-    suggestion_visible, suggestion_visible_indices, _ = build_visible_text_index(suggestion)
-
-    if original_visible != suggestion_visible:
-        return suggestion
-
-    if not original_visible_indices or not suggestion_visible_indices:
-        return original if original.strip() else suggestion
-
-    merged: list[str] = []
-    leading_original = original[: original_visible_indices[0]]
-    leading_suggestion = suggestion[: suggestion_visible_indices[0]]
-    merged.append(leading_original if leading_original.isspace() else leading_suggestion)
-
-    for ordinal, suggestion_index in enumerate(suggestion_visible_indices):
-        merged.append(suggestion[suggestion_index])
-
-        next_original_index = original_visible_indices[ordinal + 1] if ordinal + 1 < len(original_visible_indices) else None
-        next_suggestion_index = (
-            suggestion_visible_indices[ordinal + 1] if ordinal + 1 < len(suggestion_visible_indices) else None
-        )
-
-        original_gap = (
-            original[original_visible_indices[ordinal] + 1 : next_original_index]
-            if next_original_index is not None
-            else original[original_visible_indices[ordinal] + 1 :]
-        )
-        suggestion_gap = (
-            suggestion[suggestion_index + 1 : next_suggestion_index]
-            if next_suggestion_index is not None
-            else suggestion[suggestion_index + 1 :]
-        )
-
-        merged.append(original_gap if "\n" in original_gap else suggestion_gap)
-
-    return "".join(merged)
-
-
-def apply_chunk_corrections(chunk: str, pages: list[dict]) -> str:
-    combined_source = "".join(str(page.get("str", "")) for page in pages)
-    fallback = "".join(apply_page_corrections(page) for page in pages) or chunk
-
-    if not combined_source:
-        return fallback
-
-    chunk_visible, chunk_visible_indices, _ = build_visible_text_index(chunk)
-    source_visible, _, source_visible_lookup = build_visible_text_index(combined_source)
-
-    if chunk_visible != source_visible:
-        return fallback
-
-    replacements: list[tuple[int, int, str, str]] = []
-    page_offset = 0
-
-    for page in pages:
-        for error in page.get("errInfo", []):
-            suggestions = split_candidates(error.get("candWord"))
-            if not suggestions:
-                continue
-
-            start = int(error.get("start", -1))
-            end = int(error.get("end", -1))
-
-            if start < 0 or end < start:
-                continue
-
-            start += page_offset
-            end += page_offset
-
-            visible_ordinals = [
-                source_visible_lookup[index]
-                for index in range(start, min(end + 1, len(source_visible_lookup)))
-                if source_visible_lookup[index] is not None
-            ]
-
-            if not visible_ordinals:
-                continue
-
-            original_start = chunk_visible_indices[visible_ordinals[0]]
-            original_end = chunk_visible_indices[visible_ordinals[-1]]
-            replacements.append((original_start, original_end, suggestions[0], str(error.get("orgStr", ""))))
-
-        page_offset += len(str(page.get("str", "")))
-
-    if not replacements:
-        return chunk
-
-    corrected = chunk
-
-    for start, end, suggestion, original in sorted(replacements, key=lambda item: item[0], reverse=True):
-        slice_end = end + 1
-        if original:
-            while (
-                slice_end > start
-                and corrected[start:slice_end] != original
-                and corrected[start : slice_end - 1] == original
-            ):
-                slice_end -= 1
-
-        original_slice = corrected[start:slice_end]
-        replacement = preserve_original_layout(original_slice, suggestion)
-        corrected = f"{corrected[:start]}{replacement}{corrected[slice_end:]}"
-
-    return corrected
-
-
-def build_issue(chunk_index: int, page_index: int, issue_index: int, page: dict, error: dict) -> SpellCheckIssue:
-    return SpellCheckIssue(
-        chunk_index=chunk_index,
-        page_index=page_index,
-        issue_index=issue_index,
-        sentence=str(page.get("str", "")),
-        original=str(error.get("orgStr", "")),
-        suggestions=split_candidates(error.get("candWord")),
-        reason=strip_html(error.get("help")) or strip_html(error.get("errMsg")),
-        start=int(error["start"]) if str(error.get("start", "")).strip() else None,
-        end=int(error["end"]) if str(error.get("end", "")).strip() else None,
-        correct_method=int(error["correctMethod"])
-        if str(error.get("correctMethod", "")).strip()
-        else None,
-        error_message=strip_html(error.get("errMsg")),
-    )
-
-
-def check_text(
-    text: str,
-    *,
-    max_chars: int = DEFAULT_MAX_CHARS,
-    strong_rules: bool = True,
-    timeout: int = DEFAULT_TIMEOUT,
-    throttle_seconds: float = DEFAULT_THROTTLE_SECONDS,
-    requester: Callable[..., str] = fetch_spell_check_html,
-    sleep_fn: Callable[[float], None] = time.sleep,
-) -> dict:
-    chunks = split_text_into_chunks(text, max_chars=max_chars)
-    corrected_chunks: list[str] = []
-    issues: list[SpellCheckIssue] = []
-    chunk_reports: list[dict] = []
-
-    for chunk_index, chunk in enumerate(chunks):
-        if chunk_index > 0 and throttle_seconds > 0:
-            sleep_fn(throttle_seconds)
-
-        html = requester(chunk, strong_rules=strong_rules, timeout=timeout)
-        pages = extract_result_payload(html)
-        corrected_chunk = apply_chunk_corrections(chunk, pages)
-
-        corrected_chunks.append(corrected_chunk)
-        chunk_reports.append(
-            {
-                "chunk_index": chunk_index,
-                "original_text": chunk,
-                "corrected_text": corrected_chunk,
-                "page_count": len(pages),
-            }
-        )
-
-        for page_index, page in enumerate(pages):
-            for issue_index, error in enumerate(page.get("errInfo", [])):
-                issues.append(build_issue(chunk_index, page_index, issue_index, page, error))
-
-    return {
-        "original_text": str(text or ""),
-        "corrected_text": "".join(corrected_chunks),
-        "chunks": chunk_reports,
-        "issues": issues,
-        "meta": {
-            "chunk_count": len(chunks),
-            "strong_rules": strong_rules,
-            "max_chars": max_chars,
-        },
-    }
-
-
-def parse_args(argv: list[str]) -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Run the official Nara/PNU Korean spell checker.")
-    parser.add_argument("--text", help="Inline Korean text to inspect.")
-    parser.add_argument("--file", help="UTF-8 text/markdown file to inspect.")
-    parser.add_argument("--max-chars", type=parse_positive_int, default=DEFAULT_MAX_CHARS)
-    parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT)
-    parser.add_argument("--throttle-seconds", type=float, default=DEFAULT_THROTTLE_SECONDS)
-    parser.add_argument("--weak-rules", action="store_true", help="Disable the strong-rules checkbox.")
-    parser.add_argument("--format", choices=["json", "text"], default="json")
-    args = parser.parse_args(argv)
-
-    if not args.text and not args.file:
-        parser.error("Either --text or --file is required.")
-
-    return args
-
-
-def load_input(args: argparse.Namespace) -> str:
-    if args.text:
-        return args.text
-
-    return Path(args.file).read_text(encoding="utf-8")
-
-
-def serialize_report(report: dict) -> dict:
-    return {
-        **report,
-        "issues": [asdict(issue) for issue in report["issues"]],
-    }
-
-
-def print_text_report(report: dict) -> None:
-    print("# corrected_text")
-    print(report["corrected_text"])
-    print()
-    print("# issues")
-
-    for issue in report["issues"]:
-        print(f"- chunk={issue.chunk_index} page={issue.page_index} issue={issue.issue_index}")
-        print(f"  original: {issue.original}")
-        print(f"  suggestions: {', '.join(issue.suggestions) if issue.suggestions else '(없음)'}")
-        print(f"  reason: {issue.reason or '(없음)'}")
-
-
-def main(argv: list[str] | None = None) -> int:
-    args = parse_args(argv or sys.argv[1:])
-    report = check_text(
-        load_input(args),
-        max_chars=args.max_chars,
-        strong_rules=not args.weak_rules,
-        timeout=args.timeout,
-        throttle_seconds=args.throttle_seconds,
-    )
-
-    if args.format == "json":
-        print(json.dumps(serialize_report(report), ensure_ascii=False, indent=2))
-    else:
-        print_text_report(report)
-
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
+exec(compile(_BUNDLED_HELPER.read_text(encoding="utf-8"), str(_BUNDLED_HELPER), "exec"), globals())