mirror of
https://github.com/NomaDamas/k-skill.git
synced 2026-06-24 02:04:11 +00:00
Clarify the smoke-runner trust boundary
Lock the unsandboxed smoke command contract and document that the managed clone is mutable bot state rather than a write-protected boundary. Constraint: PR #263 follow-up asked for TDD coverage and an update on feature/#257 targeting dev. Rejected: Hardening the runner into per-skill worktrees in this patch | broader behavior change than the approved documentation/contract follow-up. Confidence: high Scope-risk: narrow Directive: Do not describe the QA clone as write-protected while smoke tests run with sandbox bypass in that clone. Tested: bats tools/k-skill-qa-bot/test/bats/; shellcheck -e SC1091,SC2016,SC2012 tools/k-skill-qa-bot/bin/*.sh tools/k-skill-qa-bot/bin/lib/*.sh tools/k-skill-qa-bot/install.sh tools/k-skill-qa-bot/uninstall.sh; python3 -m py_compile tools/k-skill-qa-bot/bin/*.py tools/k-skill-qa-bot/bin/lib/*.py; git diff --check origin/dev...HEAD Not-tested: Live Codex network smoke execution against production skill endpoints
This commit is contained in:
parent
e7dbaacce9
commit
5b2b0e6691
4 changed files with 63 additions and 0 deletions
|
|
@ -22,6 +22,7 @@ The k-skill repository itself is **never modified** by the bot — it is read-on
|
|||
|
||||
- Smoke tests intentionally run unsandboxed and may contact public skill endpoints, plus git, Codex, GitHub, and k-skill-proxy health-check endpoints.
|
||||
- A dedicated LaunchAgent is scheduling isolation only; it is not a separate OS user, container, or filesystem sandbox.
|
||||
- The bot-managed clone is not write-protected from the unsandboxed smoke agent; treat it as mutable bot state rather than a write-protected filesystem boundary.
|
||||
- The judge uses read-only/no-approval Codex settings, but is still a tool-capable Codex agent over untrusted transcripts and skill Markdown. Do not describe it as a no-tools or file-isolated model call unless the implementation changes to enforce that boundary.
|
||||
|
||||
## Design rules
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ bash ~/.local/share/k-skill-qa-bot/uninstall.sh --yes --purge --purge-logs
|
|||
|
||||
- Skill smoke tests use `--dangerously-bypass-approvals-and-sandbox` because the Codex sandbox can block legitimate DNS/network lookups for public skill endpoints exercised by smoke tests.
|
||||
- A dedicated LaunchAgent is scheduling isolation only; it is not a separate OS user, container, or filesystem sandbox.
|
||||
- The bot-managed clone is not write-protected from the unsandboxed smoke agent; treat it as mutable bot state and judge only against inputs whose provenance is understood.
|
||||
- The LLM judge stays on the safer `-s read-only` path with `approval_policy="never"`; read-only/no-approval limits writes and approval prompts, but does not make the judge a no-tools or file-isolated model call. Treat transcript and skill Markdown as untrusted input.
|
||||
- 10 destructive/login-required skills are force-skipped before any codex call is issued.
|
||||
- Deprecated skills (`~~name~~ ⚠️ 지원 중단` in README) are detected and skipped.
|
||||
|
|
|
|||
|
|
@ -16,10 +16,12 @@ setup() {
|
|||
|
||||
@test "README accurately documents smoke-test egress and LaunchAgent boundary" {
|
||||
grep -Fq 'public skill endpoints exercised by smoke tests' "$README"
|
||||
grep -Fq 'bot-managed clone is not write-protected from the unsandboxed smoke agent' "$README"
|
||||
grep -Fq 'A dedicated LaunchAgent is scheduling isolation only; it is not a separate OS user, container, or filesystem sandbox' "$README"
|
||||
}
|
||||
|
||||
@test "QA-bot AGENTS guidance preserves split trust boundary" {
|
||||
grep -Fq 'Smoke tests intentionally run unsandboxed and may contact public skill endpoints' "$AGENTS"
|
||||
grep -Fq 'bot-managed clone is not write-protected from the unsandboxed smoke agent' "$AGENTS"
|
||||
grep -Fq 'The judge uses read-only/no-approval Codex settings, but is still a tool-capable Codex agent over untrusted transcripts and skill Markdown' "$AGENTS"
|
||||
}
|
||||
|
|
|
|||
59
tools/k-skill-qa-bot/test/bats/smoke_command.bats
Normal file
59
tools/k-skill-qa-bot/test/bats/smoke_command.bats
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
QA_BOT_ROOT="$(cd "$BATS_TEST_DIRNAME/../.." && pwd)"
|
||||
TMP="$(mktemp -d)"
|
||||
STUB_BIN="$TMP/bin"
|
||||
mkdir -p "$STUB_BIN" "$TMP/clone" "$TMP/run"
|
||||
CAPTURE="$TMP/argv.txt"
|
||||
cat > "$STUB_BIN/codex" <<'SH'
|
||||
#!/usr/bin/env bash
|
||||
printf '%s\n' "$@" > "$CODEX_ARGV_CAPTURE"
|
||||
printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"smoke ok"}}'
|
||||
SH
|
||||
chmod +x "$STUB_BIN/codex"
|
||||
cat > "$STUB_BIN/gtimeout" <<'SH'
|
||||
#!/usr/bin/env bash
|
||||
if [ "$1" = "--kill-after=15" ]; then
|
||||
shift 2
|
||||
fi
|
||||
exec "$@"
|
||||
SH
|
||||
chmod +x "$STUB_BIN/gtimeout"
|
||||
}
|
||||
|
||||
teardown() {
|
||||
rm -rf "$TMP"
|
||||
}
|
||||
|
||||
@test "test-skill keeps smoke codex execution on the documented sandbox-bypass path" {
|
||||
classification='{"name":"demo","skip_reason":null,"default_test_prompt":"run demo smoke"}'
|
||||
|
||||
run env -i HOME="$HOME" PATH="$STUB_BIN:$PATH" CODEX_BIN="codex" CODEX_ARGV_CAPTURE="$CAPTURE" \
|
||||
K_QA_HOME="$TMP/home" K_SKILL_CLONE="$TMP/clone" CODEX_MODEL="smoke-model" CODEX_PROVIDER="smoke-provider" TIMEOUT_SECS="5" \
|
||||
bash -c 'printf "%s" "$0" | "$1" --run-dir "$2"' "$classification" "$QA_BOT_ROOT/bin/test-skill.sh" "$TMP/run"
|
||||
|
||||
[ "$status" -eq 0 ]
|
||||
[ -f "$TMP/run/results/demo.exec.json" ]
|
||||
grep -qx -- 'exec' "$CAPTURE"
|
||||
grep -qx -- '--json' "$CAPTURE"
|
||||
grep -qx -- '--dangerously-bypass-approvals-and-sandbox' "$CAPTURE"
|
||||
grep -qx -- '--skip-git-repo-check' "$CAPTURE"
|
||||
grep -qx -- '--ephemeral' "$CAPTURE"
|
||||
grep -qx -- '-C' "$CAPTURE"
|
||||
grep -qx -- "$TMP/clone" "$CAPTURE"
|
||||
grep -qx -- '-m' "$CAPTURE"
|
||||
grep -qx -- 'smoke-model' "$CAPTURE"
|
||||
grep -qx -- 'model_provider="smoke-provider"' "$CAPTURE"
|
||||
grep -qx -- 'run demo smoke' "$CAPTURE"
|
||||
! grep -qx -- '-s' "$CAPTURE"
|
||||
! grep -qx -- 'read-only' "$CAPTURE"
|
||||
python3 - "$TMP/run/results/demo.exec.json" <<'PY'
|
||||
import json, sys
|
||||
with open(sys.argv[1], encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
assert data["status"] == "executed", data
|
||||
assert data["codex_model"] == "smoke-model", data
|
||||
assert data["test_prompt"] == "run demo smoke", data
|
||||
PY
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue