Enforce fail-closed candidate identity parsing

Constraint: PR #266 review required exact candidate-name matching and CLI help regression coverage.\nRejected: fallback-to-query-name on missing upstream markup | it can mislabel unrelated candidates as exact matches.\nConfidence: high\nScope-risk: narrow\nDirective: Keep NEC parser changes fail-closed when candidate identity cannot be parsed.\nTested: git diff --check; node --test packages/local-election-candidate-search/test/index.test.js; npm run lint --workspace local-election-candidate-search; npm run test --workspace local-election-candidate-search; npm pack --workspace local-election-candidate-search --dry-run; live CLI smoke for 오세훈; CLI --help smoke.\nNot-tested: repo-wide npm run ci remains blocked by pre-existing missing SKILL.md: ohou-today-deal.
This commit is contained in:
Jeffrey (Dongkyu) Kim 2026-05-18 16:24:12 +09:00
commit 8bcd5fe7cf
3 changed files with 68 additions and 10 deletions

View file

@ -2,6 +2,10 @@
const { searchCandidates } = require("./index")
async function main(options = parseArgs(process.argv.slice(2)), io = console) {
if (options.help) {
printHelp(io)
return
}
const result = await searchCandidates(options)
io.log(JSON.stringify(result, null, 2))
}
@ -19,10 +23,8 @@ function parseArgs(argv) {
else if (arg === "--local-only") options.localOnly = true
else if (arg === "--include-html") options.includeHtml = true
else if (arg === "--fixture") options.fixture = argv[++i] || ""
else if (arg === "--help" || arg === "-h") {
printHelp(io)
process.exit(0)
} else if (!options.name) options.name = arg
else if (arg === "--help" || arg === "-h") options.help = true
else if (!options.name) options.name = arg
}
return options
}

View file

@ -210,6 +210,14 @@ function isUnexpectedHtml(html) {
return !/resultDiv|class=["']result|검색결과|fn_firstView/.test(html) && /NetFunnel|로그인|점검|대기열|접근|차단|서비스/.test(text)
}
function hasUnparsedCandidateResults(html) {
if (!/resultDiv|검색결과|fn_firstView/.test(html)) return false
if (/<div\b[^>]*class=(['"])[^'"]*\bresult\b[^'"]*\1/i.test(html)) return false
const resultDiv = String(html || "").match(/<div\b[^>]*class=(['"])[^'"]*\bresultDiv\b[^'"]*\1[^>]*>([\s\S]*?)<\/div>/i)
if (!resultDiv) return false
return stripTags(resultDiv[2]).length > 0
}
function filterItem(item, options) {
if (options.localOnly && !item.is_local_election) return false
if (options.electionCode && item.election_code !== options.electionCode) return false
@ -236,7 +244,9 @@ function parseSearchHtml(html, options = {}) {
}
const resultRegex = /<div\b([^>]*)class=(['"])[^'"]*\bresult\b[^'"]*\2([^>]*)>([\s\S]*?)(?=<div\b[^>]*class=(['"])[^'"]*\bresult\b|<div\b[^>]*class=(['"])[^'"]*\bpage\b|<\/body>|$)/gi
let parsedResultCards = 0
for (const resultMatch of html.matchAll(resultRegex)) {
parsedResultCards += 1
const resultAttrs = `${resultMatch[1] || ""} ${resultMatch[3] || ""}`
const resultHtml = resultMatch[4]
const nameMatch = resultHtml.match(/<p\b[^>]*class=(['"])[^'"]*\bname\b[^'"]*\1[^>]*>([\s\S]*?)<\/p>/i)
@ -244,7 +254,15 @@ function parseSearchHtml(html, options = {}) {
const strongMatch = nameHtml.match(/<strong[^>]*>([\s\S]*?)<\/strong>/i)
const hanjaMatch = nameHtml.match(/<span\b[^>]*class=(['"])[^'"]*\bhanja\b[^'"]*\1[^>]*>\s*\((.*?)\)\s*<\/span>/i)
const dateMatch = nameHtml.match(/<span\b[^>]*class=(['"])[^'"]*\bdate\b[^'"]*\1[^>]*>([\s\S]*?)<\/span>/i)
const personName = strongMatch ? stripTags(strongMatch[1]) : normalized.name
const personName = strongMatch ? stripTags(strongMatch[1]) : null
if (!personName) {
warnings.push("missing candidate name in NEC result card; skipped result because exact-name matching could not be verified")
continue
}
if (normalizeToken(personName) !== normalizeToken(normalized.name)) {
warnings.push(`candidate name mismatch in NEC result card; expected ${normalized.name} but found ${personName}; skipped result`)
continue
}
const hanja = hanjaMatch ? stripTags(hanjaMatch[2]) : null
const { birthDate, gender } = parseBirthDateAndGender(dateMatch ? stripTags(dateMatch[2]) : stripTags(nameHtml), resultAttrs)
@ -282,6 +300,10 @@ function parseSearchHtml(html, options = {}) {
}
}
if (parsedResultCards === 0 && hasUnparsedCandidateResults(html)) {
warnings.push("parser drift suspected: NEC search result markers were present but no supported result cards could be parsed")
}
const limitedItems = items.slice(0, normalized.limit)
if (limitedItems.length === 0 && warnings.length === 0) warnings.push("no candidate results matched the provided name/filters on the NEC search page")
const result = {

View file

@ -96,7 +96,7 @@ test("buildSearchRequest posts to the official NEC integrated candidate search",
test("parseSearchHtml returns local election candidate entries with profile fields", () => {
const result = parseSearchHtml(SEARCH_HTML, { name: "오세훈" })
assert.equal(result.summary.returned_count, 2)
assert.equal(result.summary.returned_count, 1)
assert.equal(result.items[0].name, "오세훈")
assert.equal(result.items[0].hanja, "吳世勲")
assert.equal(result.items[0].birth_date, "1961-01-04")
@ -108,17 +108,40 @@ test("parseSearchHtml returns local election candidate entries with profile fiel
assert.equal(result.items[0].district, "서울특별시")
assert.equal(result.items[0].job, "서울특별시장")
assert.match(result.items[0].career.join("\n"), /제39대 서울특별시장/)
assert.equal(result.items[1].votes, 2371)
assert.equal(result.items[1].vote_share, "9.55%")
assert.equal(result.warnings.some((warning) => /candidate name mismatch.*김동연/i.test(warning)), true)
})
test("parseSearchHtml enforces exact candidate-name matches on mixed result pages", () => {
const result = parseSearchHtml(SEARCH_HTML, { name: "오세훈", localOnly: false })
assert.deepEqual(result.items.map((item) => item.name), ["오세훈"])
assert.equal(result.summary.returned_count, 1)
assert.match(result.warnings.join("\n"), /candidate name mismatch.*김동연/i)
})
test("parseSearchHtml skips result cards without a parsed candidate name", () => {
const missingNameHtml = SEARCH_HTML.replace("<strong>오세훈</strong>", "")
const result = parseSearchHtml(missingNameHtml, { name: "오세훈" })
assert.equal(result.items.length, 0)
assert.match(result.warnings.join("\n"), /missing candidate name/i)
})
test("parseSearchHtml warns separately when result markers exist but no cards parse", () => {
const driftHtml = `<!doctype html><html><body><div class="resultDiv"><section class="candidate-card">오세훈</section></div></body></html>`
const result = parseSearchHtml(driftHtml, { name: "오세훈" })
assert.equal(result.items.length, 0)
assert.match(result.warnings.join("\n"), /parser drift/i)
})
test("parseSearchHtml filters non-local elections by default and can include all", () => {
const local = parseSearchHtml(SEARCH_HTML, { name: "김동연" })
const all = parseSearchHtml(SEARCH_HTML, { name: "김동연", localOnly: false })
assert.equal(local.items.length, 2)
assert.equal(local.items.length, 1)
assert.equal(local.items.every((item) => item.is_local_election), true)
assert.equal(all.items.length, 3)
assert.equal(all.items.length, 2)
assert.equal(all.items.at(-1).election_type, "국회의원선거")
})
@ -166,3 +189,14 @@ test("CLI prints JSON search results", () => {
assert.equal(data.items.length, 1)
assert.equal(data.items[0].name, "오세훈")
})
test("CLI --help exits successfully and prints usage", () => {
const cli = require.resolve("../src/cli")
const proc = spawnSync(process.execPath, [cli, "--help"], {
cwd: require("node:path").join(__dirname, ".."),
encoding: "utf8"
})
assert.equal(proc.status, 0, proc.stderr)
assert.match(proc.stdout, /Usage: local-election-candidate-search/)
})