mirror of
https://github.com/NomaDamas/k-skill.git
synced 2026-06-24 02:04:11 +00:00
Preserve documented Korean count semantics in edge-case inputs
The NEIS compatibility path was treating mark-only graphemes as Hangul, and the CLI accepted repeated input flags by silently overwriting earlier values. Tighten the Hangul branch to require an actual Hangul-script code point and reject any second input-source flag, then lock both fixes with helper and CLI regressions. Constraint: Must preserve the published default and NEIS counting contracts without new dependencies Rejected: Doc-only clarification | would leave the shipped behavior incorrect Confidence: high Scope-risk: narrow Reversibility: clean Directive: Keep the NEIS 3-byte branch limited to graphemes that actually contain Hangul script code points Tested: node --test scripts/skill-docs.test.js scripts/test_korean_character_count.js; node scripts/korean_character_count.js --text '가나다' --format json; node scripts/korean_character_count.js --file <tmpfile> --profile neis --format text; printf '한\r\n나' | node scripts/korean_character_count.js --stdin --format json; mark-only NEIS and duplicate-input repros; npm test; npm run ci Not-tested: repeated --stdin as a separate committed regression case
This commit is contained in:
parent
b2eedd827b
commit
488a9f129e
2 changed files with 22 additions and 3 deletions
|
|
@ -4,7 +4,8 @@
|
|||
const fs = require("node:fs");
|
||||
|
||||
const LINE_BREAK_PATTERN = /\r\n|[\n\r\u2028\u2029]/gu;
|
||||
const HANGUL_PATTERN = /^[\p{Script=Hangul}\p{Mark}]+$/u;
|
||||
const HANGUL_OR_MARK_PATTERN = /^[\p{Script=Hangul}\p{Mark}]+$/u;
|
||||
const HAS_HANGUL_PATTERN = /\p{Script=Hangul}/u;
|
||||
const WHITESPACE_ONLY_PATTERN = /^\s+$/u;
|
||||
const ASCII_ONLY_PATTERN = /^[\x00-\x7F]+$/;
|
||||
|
||||
|
|
@ -60,7 +61,7 @@ function countNeisGraphemeBytes(grapheme) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (HANGUL_PATTERN.test(grapheme)) {
|
||||
if (HANGUL_OR_MARK_PATTERN.test(grapheme) && HAS_HANGUL_PATTERN.test(grapheme)) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
|
|
@ -171,7 +172,7 @@ function parseArgs(argv, stdinIsTTY = process.stdin.isTTY) {
|
|||
}
|
||||
|
||||
function setInputMode(options, nextMode) {
|
||||
if (options.inputMode && options.inputMode !== nextMode) {
|
||||
if (options.inputMode) {
|
||||
throw new Error("Provide exactly one input source with --text, --file, or --stdin.");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,12 @@ test("countNeisBytes applies Hangul 3-byte, ASCII 1-byte, and newline 2-byte rul
|
|||
assert.equal(countNeisBytes("한글"), 6);
|
||||
});
|
||||
|
||||
test("countNeisBytes falls back to UTF-8 bytes for non-Hangul graphemes", () => {
|
||||
assert.equal(countUtf8Bytes("\u0301"), 2);
|
||||
assert.equal(countNeisBytes("\u0301"), 2);
|
||||
assert.equal(countNeisBytes("🙂"), countUtf8Bytes("🙂"));
|
||||
});
|
||||
|
||||
test("parseArgs enforces one input source and validates the profile", () => {
|
||||
assert.deepEqual(parseArgs(["--text", "가나다"]), {
|
||||
format: "json",
|
||||
|
|
@ -50,6 +56,7 @@ test("parseArgs enforces one input source and validates the profile", () => {
|
|||
});
|
||||
|
||||
assert.throws(() => parseArgs(["--text", "가", "--file", "sample.txt"]), /exactly one input source/i);
|
||||
assert.throws(() => parseArgs(["--text", "가", "--text", "나"]), /exactly one input source/i);
|
||||
assert.throws(() => parseArgs(["--profile", "legacy", "--text", "가"]), /unknown profile/i);
|
||||
});
|
||||
|
||||
|
|
@ -88,6 +95,17 @@ test("CLI accepts text, file, and stdin input", () => {
|
|||
);
|
||||
assert.equal(stdinOutput.profile, "neis");
|
||||
assert.equal(stdinOutput.counts.bytes, countNeisBytes("가나다\nABC"));
|
||||
|
||||
const duplicateText = childProcess.spawnSync(
|
||||
"node",
|
||||
["scripts/korean_character_count.js", "--text", "가나다", "--text", "라마바", "--format", "json"],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
assert.notEqual(duplicateText.status, 0);
|
||||
assert.match(duplicateText.stderr, /exactly one input source/i);
|
||||
} finally {
|
||||
fs.rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue