mirror of
https://github.com/NomaDamas/k-skill.git
synced 2026-06-24 02:04:11 +00:00
Clarify cleaner usage evidence boundaries
The cleanup helper now streams local logs, reports which evidence sources were merged, and keeps README table coverage tied to the central skill-name fixture so the documented cleanup signal stays trustworthy for large local histories and mixed imported counts. Constraint: Follow-up addresses PR #178 review comments without changing the non-destructive recommendation model. Rejected: Filtering imported usage JSON by --days inside the helper | imported counts are already aggregated and lack per-record timestamps. Confidence: high Scope-risk: narrow Directive: Keep --usage-json documented as pre-windowed unless the input schema gains timestamped per-record events. Tested: PYTHONPATH=scripts python3 -m unittest scripts.test_k_skill_cleaner Tested: node --test scripts/skill-docs.test.js Tested: npm run lint Tested: npm run typecheck && npm test Tested: npm run ci
This commit is contained in:
parent
1935e641a6
commit
0b280839d6
5 changed files with 87 additions and 12 deletions
|
|
@ -31,4 +31,4 @@ python3 scripts/k_skill_cleaner.py \
|
|||
--keep k-skill-setup,k-skill-cleaner
|
||||
```
|
||||
|
||||
`--days 90`은 최근 90일 window만 카운트한다. timestamp가 없는 로그 줄은 파일 mtime으로 포함/제외를 결정한다. 출력은 파일 삭제를 하지 않는 JSON 리포트다. `zero_triggers`나 `low_usage`만 있는 항목은 바로 삭제하지 말고 검토 후보로 남긴다. `interview_never_use`가 포함된 항목은 사용자의 의도가 확인된 삭제 후보로 보고한다.
|
||||
`--days 90`은 최근 90일 window만 카운트한다. timestamp가 없는 로그 줄은 파일 mtime으로 포함/제외를 결정한다. 단, `--usage-json`으로 넣은 값은 이미 집계된 count로 간주하므로 `--days`/`--since`로 다시 필터링하지 않는다. 같은 기간의 통계를 export하거나 직접 전처리한 JSON을 넣어야 한다. 출력은 `usage_json`과 `scanned_logs` provenance를 포함하고, 파일 삭제를 하지 않는 JSON 리포트다. `zero_triggers`나 `low_usage`만 있는 항목은 바로 삭제하지 말고 검토 후보로 남긴다. `interview_never_use`가 포함된 항목은 사용자의 의도가 확인된 삭제 후보로 보고한다.
|
||||
|
|
|
|||
|
|
@ -53,12 +53,16 @@ For agent exports or hand-curated counts, pass a JSON object mapping skill name
|
|||
python3 scripts/k_skill_cleaner.py --skills-root . --usage-json usage-counts.json --days 90
|
||||
```
|
||||
|
||||
`--days` and `--since` filter scanned log records only. `--usage-json` values are already-aggregated counts, so prepare/export that JSON for the same time window before passing it to the helper.
|
||||
|
||||
The helper prints JSON with:
|
||||
|
||||
- `skill_count`: number of root-level skills discovered.
|
||||
- `candidates`: ranked `remove` or `review` candidates with `trigger_count` and `reasons`.
|
||||
- `agent_usage_sources`: the agent-specific paths and caveats above.
|
||||
- `time_window`: the effective `--since`/`--days` cutoff and mtime fallback caveat.
|
||||
- `usage_json`: whether imported counts were merged and the pre-windowing caveat.
|
||||
- `scanned_logs`: how many readable log files were scanned and which paths contributed best-effort evidence.
|
||||
- `safety`: reminder that no files were deleted.
|
||||
|
||||
## Recommendation policy
|
||||
|
|
|
|||
|
|
@ -228,17 +228,21 @@ def collect_skill_usage(
|
|||
path = Path(raw_path).expanduser()
|
||||
if not path.is_file():
|
||||
continue
|
||||
for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
|
||||
parsed: Any | None = None
|
||||
try:
|
||||
parsed = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
if not _line_is_in_window(path, line, parsed, since_dt):
|
||||
continue
|
||||
for skill in skills:
|
||||
if (parsed is not None and _json_mentions_skill(parsed, skill)) or _line_mentions_skill(line, skill):
|
||||
counts[skill] += 1
|
||||
try:
|
||||
with path.open(encoding="utf-8", errors="replace") as handle:
|
||||
for line in handle:
|
||||
parsed: Any | None = None
|
||||
try:
|
||||
parsed = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
if not _line_is_in_window(path, line, parsed, since_dt):
|
||||
continue
|
||||
for skill in skills:
|
||||
if (parsed is not None and _json_mentions_skill(parsed, skill)) or _line_mentions_skill(line, skill):
|
||||
counts[skill] += 1
|
||||
except OSError:
|
||||
continue
|
||||
return counts
|
||||
|
||||
|
||||
|
|
@ -365,6 +369,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||
if args.scan_default_logs:
|
||||
log_paths.extend(expand_default_log_paths())
|
||||
since = _resolve_since(args.days, args.since)
|
||||
scanned_log_paths = sorted({str(path.expanduser()) for path in log_paths if path.expanduser().is_file()})
|
||||
log_counts = collect_skill_usage(log_paths, skill_names, since=since)
|
||||
for skill, count in log_counts.items():
|
||||
usage_counts[skill] = usage_counts.get(skill, 0) + count
|
||||
|
|
@ -382,8 +387,19 @@ def main(argv: list[str] | None = None) -> int:
|
|||
"time_window": {
|
||||
"since": since.isoformat() if since is not None else None,
|
||||
"days": args.days if args.since is None else None,
|
||||
"scope": "Applies to scanned logs only; usage JSON counts are merged as already aggregated/pre-windowed input.",
|
||||
"fallback": "Untimestamped log lines are included or skipped by log file mtime.",
|
||||
},
|
||||
"usage_json": {
|
||||
"applied": args.usage_json is not None,
|
||||
"path": args.usage_json,
|
||||
"caveat": "Usage JSON counts are treated as already aggregated/pre-windowed and are not filtered by --days or --since.",
|
||||
},
|
||||
"scanned_logs": {
|
||||
"count": len(scanned_log_paths),
|
||||
"paths": scanned_log_paths,
|
||||
"caveat": "Unreadable log files are skipped; trigger detection is best-effort.",
|
||||
},
|
||||
"safety": "No files were deleted. Review candidates and remove skills in a separate explicit edit.",
|
||||
}
|
||||
print(json.dumps(report, ensure_ascii=False, indent=2))
|
||||
|
|
|
|||
|
|
@ -3281,6 +3281,7 @@ const README_SKILL_NAME_COLUMN_MAPPING = [
|
|||
["네이버 뉴스 검색", "naver-news-search"],
|
||||
["한국어 글자 수 세기", "korean-character-count"],
|
||||
["한국어 유행어 글쓰기", "korean-slang-writing"],
|
||||
["K-스킬 클리너", "k-skill-cleaner"],
|
||||
];
|
||||
|
||||
test("README skill table header advertises the new 스킬 이름 column (issue #165)", () => {
|
||||
|
|
@ -3296,6 +3297,11 @@ test("README skill table header advertises the new 스킬 이름 column (issue #
|
|||
test("README skill table includes inline-code skill names for every documented row (issue #165)", () => {
|
||||
const readme = read("README.md");
|
||||
|
||||
assert.ok(
|
||||
README_SKILL_NAME_COLUMN_MAPPING.some(([, skillName]) => skillName === "k-skill-cleaner"),
|
||||
"expected k-skill-cleaner to be covered by the central README skill-name mapping fixture",
|
||||
);
|
||||
|
||||
for (const [label, skillName] of README_SKILL_NAME_COLUMN_MAPPING) {
|
||||
const escapedLabel = escapeRegex(label);
|
||||
const escapedName = escapeRegex(skillName);
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import sys
|
|||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from k_skill_cleaner import (
|
||||
AGENT_USAGE_SOURCES,
|
||||
|
|
@ -85,6 +86,54 @@ class KSkillCleanerTest(unittest.TestCase):
|
|||
self.assertEqual(counts["fallback-skill"], 1)
|
||||
self.assertEqual(counts["old-fallback"], 0)
|
||||
|
||||
def test_collect_skill_usage_streams_log_files_without_reading_whole_file(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
log_path = Path(tmp) / "codex.jsonl"
|
||||
log_path.write_text(json.dumps({"skill": "kbo-results"}) + "\n", encoding="utf-8")
|
||||
|
||||
with patch.object(Path, "read_text", side_effect=AssertionError("collect_skill_usage must stream logs")):
|
||||
counts = collect_skill_usage([log_path], ["kbo-results", "unused"])
|
||||
|
||||
self.assertEqual(counts["kbo-results"], 1)
|
||||
self.assertEqual(counts["unused"], 0)
|
||||
|
||||
def test_cli_reports_usage_json_provenance_and_window_caveat(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
root = Path(tmp)
|
||||
skill_dir = root / "kbo-results"
|
||||
skill_dir.mkdir()
|
||||
(skill_dir / "SKILL.md").write_text("---\nname: kbo-results\n", encoding="utf-8")
|
||||
usage_json = root / "usage.json"
|
||||
usage_json.write_text(json.dumps({"kbo-results": 3}), encoding="utf-8")
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-c",
|
||||
(
|
||||
"import sys; "
|
||||
"from k_skill_cleaner import main; "
|
||||
"sys.exit(main(sys.argv[1:]))"
|
||||
),
|
||||
"--skills-root",
|
||||
str(root),
|
||||
"--usage-json",
|
||||
str(usage_json),
|
||||
"--days",
|
||||
"90",
|
||||
],
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
report = json.loads(result.stdout)
|
||||
|
||||
self.assertTrue(report["usage_json"]["applied"])
|
||||
self.assertEqual(report["usage_json"]["path"], str(usage_json))
|
||||
self.assertIn("pre-windowed", report["usage_json"]["caveat"])
|
||||
self.assertEqual(report["scanned_logs"]["count"], 0)
|
||||
self.assertIn("usage JSON", report["time_window"]["scope"])
|
||||
|
||||
def test_ranks_deletion_candidates_with_interview_and_usage_reasons(self):
|
||||
candidates = rank_cleanup_candidates(
|
||||
skill_names=["unused", "rare", "protected", "active"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue