Preserve unique candidate lookup results

Deduplicate parsed NEC candidate/election rows before applying user limits, and make expected CLI validation failures concise by default while keeping an explicit debug stack escape hatch.

Constraint: PR #266 round-2 follow-up requested TDD fixes for duplicate NEC rows and CLI validation UX.\nRejected: Deduplicating after limit | would still allow duplicates to crowd out unique rows.\nRejected: Always printing stack traces | exposes local paths for normal user-input failures.\nConfidence: high\nScope-risk: narrow\nDirective: Keep dedupe keys stable enough to avoid collapsing legitimately distinct historical election rows.\nTested: git diff --check; node --test packages/local-election-candidate-search/test/index.test.js; npm run lint --workspace local-election-candidate-search; npm run test --workspace local-election-candidate-search; npm pack --workspace local-election-candidate-search --dry-run; live 오세훈 smoke; live 김동연 duplicate repro; CLI no-args/help.\nNot-tested: Full npm run ci remains blocked by pre-existing missing SKILL.md: ohou-today-deal.
This commit is contained in:
Jeffrey (Dongkyu) Kim 2026-05-18 16:35:33 +09:00
commit bdba986e3e
3 changed files with 54 additions and 2 deletions

View file

@ -52,7 +52,9 @@ Options:
}
function formatError(error) {
return error && error.stack ? error.stack : String(error)
if (process.env.LOCAL_ELECTION_CANDIDATE_SEARCH_DEBUG && error && error.stack) return error.stack
if (error && error.message) return `Error: ${error.message}`
return String(error)
}
function run(argv = process.argv.slice(2), io = console) {

View file

@ -234,10 +234,24 @@ function filterItem(item, options) {
return true
}
function getCandidateElectionKey(item) {
return [
item.name,
item.birth_date,
item.election_name_code,
item.election_code,
item.party,
item.district,
item.votes,
item.vote_share
].map((value) => cleanText(value)).join("|")
}
function parseSearchHtml(html, options = {}) {
const normalized = normalizeSearchOptions(options)
const warnings = []
const items = []
const itemKeys = new Set()
const source = { url: NEC_SEARCH_URL, method: "POST", surface: "NEC election statistics integrated candidate search" }
if (isUnexpectedHtml(html)) {
warnings.push(`unexpected NEC search HTML; possible NetFunnel 로그인 점검 block page: ${stripTags(html).slice(0, 160)}`)
@ -296,7 +310,13 @@ function parseSearchHtml(html, options = {}) {
town_code: getHtmlAttr(listAttrs, "data-town-code"),
...profile
})
if (filterItem(item, normalized)) items.push(item)
if (filterItem(item, normalized)) {
const itemKey = getCandidateElectionKey(item)
if (!itemKeys.has(itemKey)) {
itemKeys.add(itemKey)
items.push(item)
}
}
}
}

View file

@ -153,6 +153,23 @@ test("parseSearchHtml supports election/date/region filters", () => {
assert.equal(result.items[0].district, "서울특별시(동작구가선거구)")
})
test("parseSearchHtml deduplicates repeated candidate election entries before applying limit", () => {
const duplicateList = SEARCH_HTML.match(/<div class="list" data-election-type="4"[\s\S]*?<\/div>\s*<\/div>\s*<div class="list" data-election-code="2"/)[0]
.replace(/\s*<div class="list" data-election-code="2"$/, "")
const duplicateHtml = SEARCH_HTML.replace(duplicateList, `${duplicateList}\n${duplicateList}`)
const result = parseSearchHtml(duplicateHtml, {
name: "김동연",
electionCode: "기초의원",
electionDate: "2014",
region: "동작",
limit: 1
})
assert.equal(result.summary.returned_count, 1)
assert.equal(result.summary.matched_before_limit, 1)
assert.deepEqual(result.items.map((item) => item.district), ["서울특별시(동작구가선거구)"])
})
test("parseSearchHtml reports empty and blocked pages as explicit failure modes", () => {
const empty = parseSearchHtml(EMPTY_HTML, { name: "없는후보" })
const blocked = parseSearchHtml(BLOCKED_HTML, { name: "오세훈" })
@ -200,3 +217,16 @@ test("CLI --help exits successfully and prints usage", () => {
assert.equal(proc.status, 0, proc.stderr)
assert.match(proc.stdout, /Usage: local-election-candidate-search/)
})
test("CLI expected validation errors print concise messages without stack traces", () => {
const cli = require.resolve("../src/cli")
const proc = spawnSync(process.execPath, [cli], {
cwd: require("node:path").join(__dirname, ".."),
encoding: "utf8"
})
assert.equal(proc.status, 1)
assert.match(proc.stderr, /Provide a candidate name to search\./)
assert.doesNotMatch(proc.stderr, /\n\s+at /)
assert.equal(proc.stdout, "")
})