Protect published tracking examples from leaking carrier contact data

The CJ official response can embed staff name and phone data in crgNm, so the published skill and feature examples now emit only normalized summary fields. The root docs regression suite also locks the safe output shape so raw CJ event payloads do not creep back into the docs.\n\nConstraint: Official CJ tracking responses may include personal contact data in crgNm\nRejected: Keep a message field with ad-hoc redaction | deleting the field is safer for published examples\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Keep carrier doc examples limited to normalized summary fields; do not dump raw tracking payloads without privacy review\nTested: node --test scripts/skill-docs.test.js; npx --yes skills add . --list; npm run ci; CJ official curl+cookie+_csrf verification for 1234567890; 우체국 curl --http1.1 --tls-max 1.2 verification for 1234567890123; lsp_diagnostics scripts/skill-docs.test.js\nNot-tested: Live carrier responses beyond the documented smoke-test invoice numbers
This commit is contained in:
Jeffrey (Dongkyu) Kim 2026-03-27 00:56:47 +09:00
commit 5b104d27e2
3 changed files with 53 additions and 13 deletions

View file

@ -226,24 +226,26 @@ normalized_events = [
"time": time_,
"location": clean(location),
"status": clean(status),
"detail": clean(detail),
}
for day, time_, location, status, detail in events
]
latest_event = normalized_events[-1] if normalized_events else None
print({
"carrier": "epost",
"tracking_no": clean(summary.group("tracking")),
"delivery_result": clean(summary.group("result")),
"delivered_to": clean(summary.group("delivered_to")),
"status": clean(summary.group("result")),
"event_count": len(normalized_events),
"latest_event": normalized_events[-1] if normalized_events else None,
"latest_event_date": latest_event.get("date") if latest_event else None,
"latest_event_time": latest_event.get("time") if latest_event else None,
"latest_event_location": latest_event.get("location") if latest_event else None,
})
PY
rm -f "$tmp_html"
```
우체국 기본정보 테이블은 `등기번호`, `보내는 분/접수일자`, `받는 분`, `수령인/배달일자`, `취급구분`, `배달결과` 순서를 사용하고, 상세 이벤트는 `processTable` 아래 `날짜 / 시간 / 발생국 / 처리현황` 행을 읽으면 된다.
우체국 기본정보 테이블은 `등기번호`, `보내는 분/접수일자`, `받는 분`, `수령인/배달일자`, `취급구분`, `배달결과` 순서를 사용하고, 상세 이벤트는 `processTable` 아래 `날짜 / 시간 / 발생국 / 처리현황` 행을 읽으면 된다. published 예시는 `tracking_no`, 현재 상태, 이벤트 수, 최신 이벤트 시각/위치처럼 배송 상태에 필요한 값만 남기고 수령인/상세 메모 원문은 그대로 노출하지 않는다.
### 3. Normalize for humans

View file

@ -147,15 +147,41 @@ if not summary:
def clean(raw: str) -> str:
return " ".join(html.unescape(re.sub(r"<[^>]+>", " ", raw)).split())
events = re.findall(
r"<tr>\s*<td>(\d{4}\.\d{2}\.\d{2})</td>\s*"
r"<td>(\d{2}:\d{2})</td>\s*"
r"<td>(.*?)</td>\s*"
r"<td>\s*<span class=\"evtnm\">(.*?)</span>(.*?)</td>\s*</tr>",
page,
re.S,
)
normalized_events = [
{
"date": day,
"time": time_,
"location": clean(location),
"status": clean(status),
}
for day, time_, location, status, detail in events
]
latest_event = normalized_events[-1] if normalized_events else None
print({
"carrier": "epost",
"tracking_no": clean(summary.group(1)),
"delivery_result": clean(summary.group(6)),
"status": clean(summary.group(6)),
"event_count": len(normalized_events),
"latest_event_date": latest_event.get("date") if latest_event else None,
"latest_event_time": latest_event.get("time") if latest_event else None,
"latest_event_location": latest_event.get("location") if latest_event else None,
})
PY
rm -f "$tmp_html"
```
우체국은 HTML 응답이라 기본정보 `table_col` 과 상세 `processTable` 을 파싱해야 한다.
우체국은 HTML 응답이라 기본정보 `table_col` 과 상세 `processTable` 을 파싱해야 한다. published 예시는 현재 상태, 이벤트 수, 최신 이벤트 시각/위치처럼 배송 상태 확인에 필요한 필드만 남기고 수령인/상세 메모 원문은 그대로 보여주지 않는다.
## 결과 정리 기준

View file

@ -191,17 +191,29 @@ test("delivery-tracking docs avoid raw CJ personal fields in published examples"
const skill = read(path.join("delivery-tracking", "SKILL.md"));
const featureDoc = read(path.join("docs", "features", "delivery-tracking.md"));
assert.doesNotMatch(skill, /"message":\s*latest\.get\("crgNm"\)/);
assert.doesNotMatch(
featureDoc,
/print\(json\.dumps\(payload\["parcelDetailResultMap"\]\["resultList"\]\[-1\],\s*ensure_ascii=False,\s*indent=2\)\)/,
);
for (const doc of [skill, featureDoc]) {
assert.doesNotMatch(doc, /"message":\s*latest\.get\("crgNm"\)/);
assert.doesNotMatch(
doc,
/print\(json\.dumps\(payload\["parcelDetailResultMap"\]\["resultList"\]\[-1\],\s*ensure_ascii=False,\s*indent=2\)\)/,
);
assert.match(doc, /"status_code":\s*latest\.get\("crgSt"\)/);
assert.match(doc, /"status":\s*status_map\.get\(latest\.get\("crgSt"\),/);
assert.match(doc, /"timestamp":\s*latest\.get\("dTime"\)/);
assert.match(doc, /"location":\s*latest\.get\("regBranNm"\)/);
assert.match(doc, /"event_count":\s*len\(events\)/);
}
assert.doesNotMatch(skill, /"delivered_to":\s*clean\(summary\.group\("delivered_to"\)\)/);
assert.doesNotMatch(skill, /"latest_event":\s*normalized_events\[-1\]\s*if normalized_events else None/);
assert.doesNotMatch(featureDoc, /"delivered_to":/);
assert.doesNotMatch(featureDoc, /"delivery_result":/);
for (const doc of [skill, featureDoc]) {
assert.match(doc, /"status":\s*clean\(summary\.group\(/);
assert.match(doc, /"event_count":\s*len\(normalized_events\)/);
assert.match(doc, /"latest_event_date":\s*latest_event\.get\("date"\) if latest_event else None/);
assert.match(doc, /"latest_event_time":\s*latest_event\.get\("time"\) if latest_event else None/);
assert.match(doc, /"latest_event_location":\s*latest_event\.get\("location"\) if latest_event else None/);
}
});