mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-17 02:05:57 +00:00
Compare commits
2 commits
main
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9f08fc925d |
||
|
|
34df600e65 |
14 changed files with 1615 additions and 1 deletions
801
agent/skill_distill.py
Normal file
801
agent/skill_distill.py
Normal file
|
|
@ -0,0 +1,801 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Skill distillation engine -- the ``/learn`` / ``hermes learn`` backend.
|
||||
|
||||
Point this at one or more directories of source material (source code, API
|
||||
docs, instruction manuals, PDFs, config samples, READMEs) and it will:
|
||||
|
||||
1. Walk the directories and CLASSIFY each file by source type.
|
||||
2. Build a compact, token-budgeted CORPUS from the highest-signal files.
|
||||
3. SYNTHESIZE a draft ``SKILL.md`` via the auxiliary LLM (main-model-first,
|
||||
cache-safe -- never touches the live conversation or its prompt cache).
|
||||
4. VERIFY the draft in a throwaway sandbox (temp dir, never the user's real
|
||||
skills tree): shell snippets are syntax-checked / dry-run, referenced
|
||||
file paths and commands are existence-checked, frontmatter is validated.
|
||||
5. COMMIT the skill via ``tools.skill_manager_tool._create_skill`` ONLY when
|
||||
verification passes the configured floor, tagging the skill with the
|
||||
verification TIER it actually achieved.
|
||||
|
||||
Design notes / invariants
|
||||
--------------------------
|
||||
* This module performs ZERO model-tool registration. It is invoked from CLI
|
||||
subcommands, gateway slash handlers, the TUI, and the dashboard -- all of
|
||||
which call :func:`distill_skill_from_dirs`. Footprint-ladder rung 2
|
||||
(CLI command + skill-producing engine), not a new core tool.
|
||||
* The LLM synthesis goes through ``agent.auxiliary_client.call_llm`` with a
|
||||
dedicated task name, so it inherits main-model-first resolution and any
|
||||
per-task config override without breaking conversation prompt caching.
|
||||
* Verification is a TIER, not a boolean. We never claim a skill was "tested"
|
||||
when we only parsed it. The achieved tier is recorded in the skill's
|
||||
frontmatter (``metadata.hermes.distill.verification``) and surfaced to the
|
||||
caller so the UI can be honest.
|
||||
* Nothing executes against the user's real ``HERMES_HOME`` during
|
||||
verification. Shell snippets run in an isolated temp working directory with
|
||||
a short timeout, and only when ``run_commands`` is explicitly enabled.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tunables (all overridable via config.yaml -> skills.distill.*; see _cfg)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Per-file read cap so one giant file can't blow the corpus budget.
|
||||
_DEFAULT_MAX_FILE_BYTES = 64_000
|
||||
# Total corpus character budget handed to the synthesis LLM.
|
||||
_DEFAULT_CORPUS_BUDGET = 180_000
|
||||
# Hard cap on files walked, so pointing at a monorepo doesn't hang.
|
||||
_DEFAULT_MAX_FILES = 400
|
||||
# Sandbox shell snippet timeout (seconds).
|
||||
_DEFAULT_SNIPPET_TIMEOUT = 15
|
||||
|
||||
# Directories that never carry distill signal -- skipped wholesale.
|
||||
_SKIP_DIRS = {
|
||||
".git", ".hg", ".svn", "node_modules", "__pycache__", ".venv", "venv",
|
||||
".mypy_cache", ".pytest_cache", ".ruff_cache", "dist", "build",
|
||||
".next", ".cache", "target", "vendor", ".idea", ".vscode", "site-packages",
|
||||
}
|
||||
|
||||
# Binary / noise extensions never read into the corpus.
|
||||
_BINARY_EXT = {
|
||||
".png", ".jpg", ".jpeg", ".gif", ".webp", ".ico", ".svg", ".mp4", ".mov",
|
||||
".mp3", ".wav", ".zip", ".tar", ".gz", ".tgz", ".7z", ".rar", ".bin",
|
||||
".so", ".dylib", ".dll", ".class", ".jar", ".pyc", ".o", ".a", ".woff",
|
||||
".woff2", ".ttf", ".eot", ".lock", ".pdf", # PDFs handled separately
|
||||
}
|
||||
|
||||
# Source-type classification by extension. Order does not matter; first hit wins
|
||||
# via the _CLASSIFY_MAP lookup.
|
||||
_CODE_EXT = {
|
||||
".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs", ".java", ".rb", ".php",
|
||||
".c", ".cpp", ".h", ".hpp", ".cs", ".swift", ".kt", ".scala", ".sh",
|
||||
".bash", ".zsh", ".lua", ".pl", ".r", ".jl", ".sql", ".dart",
|
||||
}
|
||||
_DOC_EXT = {".md", ".mdx", ".rst", ".txt", ".adoc"}
|
||||
_CONFIG_EXT = {
|
||||
".yaml", ".yml", ".toml", ".ini", ".cfg", ".json", ".env", ".properties",
|
||||
".conf", ".xml",
|
||||
}
|
||||
|
||||
# Filename signals that boost a file's priority in corpus selection.
|
||||
_HIGH_SIGNAL_NAMES = (
|
||||
"readme", "api", "openapi", "swagger", "usage", "guide", "tutorial",
|
||||
"getting-started", "quickstart", "howto", "manual", "reference",
|
||||
"endpoints", "schema", "cli", "commands", "examples", "example",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public result types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFile:
|
||||
"""A single ingested source file."""
|
||||
|
||||
path: Path
|
||||
rel: str
|
||||
kind: str # "code" | "doc" | "config" | "pdf"
|
||||
size: int
|
||||
text: str = ""
|
||||
priority: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class VerificationResult:
|
||||
"""Outcome of sandboxed verification of a draft skill."""
|
||||
|
||||
tier: str # "executed" | "checked" | "unverified" | "failed"
|
||||
passed: bool
|
||||
checks: List[str] = field(default_factory=list)
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DistillResult:
|
||||
"""Everything a caller needs to report the outcome of a /learn run."""
|
||||
|
||||
success: bool
|
||||
skill_name: Optional[str] = None
|
||||
skill_path: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
verification: Optional[VerificationResult] = None
|
||||
sources_ingested: int = 0
|
||||
source_breakdown: Dict[str, int] = field(default_factory=dict)
|
||||
draft_only: bool = False # True when verify floor not met and we did not commit
|
||||
draft_content: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
elapsed_seconds: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _cfg(key: str, default: Any) -> Any:
|
||||
"""Read skills.distill.<key> from config.yaml, falling back to default."""
|
||||
try:
|
||||
from hermes_cli.config import cfg_get, load_config
|
||||
|
||||
val = cfg_get(load_config(), "skills", "distill", key, default=None)
|
||||
return default if val is None else val
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 1+2: ingest & classify
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _classify(path: Path) -> Optional[str]:
|
||||
ext = path.suffix.lower()
|
||||
if ext == ".pdf":
|
||||
return "pdf"
|
||||
if ext in _BINARY_EXT:
|
||||
return None
|
||||
if ext in _CODE_EXT:
|
||||
return "code"
|
||||
if ext in _DOC_EXT:
|
||||
return "doc"
|
||||
if ext in _CONFIG_EXT:
|
||||
return "config"
|
||||
# Extension-less high-signal files (e.g. Dockerfile, Makefile) -> doc-ish.
|
||||
name = path.name.lower()
|
||||
if name in ("dockerfile", "makefile", "procfile", "license", "notice"):
|
||||
return "doc"
|
||||
return None
|
||||
|
||||
|
||||
def _file_priority(path: Path, kind: str) -> int:
|
||||
"""Higher = more likely to make it into the corpus budget."""
|
||||
score = 0
|
||||
name = path.name.lower()
|
||||
for sig in _HIGH_SIGNAL_NAMES:
|
||||
if sig in name:
|
||||
score += 10
|
||||
break
|
||||
# Docs and API specs are the richest distill signal.
|
||||
score += {"doc": 6, "config": 4, "code": 3, "pdf": 7}.get(kind, 0)
|
||||
# Shallower files (top-level READMEs etc.) tend to be more authoritative.
|
||||
depth = len(path.parts)
|
||||
score += max(0, 6 - depth)
|
||||
return score
|
||||
|
||||
|
||||
def _read_pdf_text(path: Path, max_bytes: int) -> str:
|
||||
"""Best-effort PDF text extraction. Returns '' if no extractor available."""
|
||||
# Prefer pypdf / PyPDF2 if installed; never hard-require it.
|
||||
for mod_name in ("pypdf", "PyPDF2"):
|
||||
try:
|
||||
mod = __import__(mod_name)
|
||||
reader = mod.PdfReader(str(path))
|
||||
chunks = []
|
||||
total = 0
|
||||
for page in reader.pages:
|
||||
t = page.extract_text() or ""
|
||||
chunks.append(t)
|
||||
total += len(t)
|
||||
if total >= max_bytes:
|
||||
break
|
||||
return "\n".join(chunks)[:max_bytes]
|
||||
except Exception:
|
||||
continue
|
||||
logger.info("No PDF extractor available for %s; skipping text.", path.name)
|
||||
return ""
|
||||
|
||||
|
||||
def ingest_directories(
|
||||
dirs: List[str],
|
||||
*,
|
||||
max_files: Optional[int] = None,
|
||||
max_file_bytes: Optional[int] = None,
|
||||
) -> Tuple[List[SourceFile], List[str]]:
|
||||
"""Walk dirs, classify and read files. Returns (sources, errors)."""
|
||||
max_files = max_files or int(_cfg("max_files", _DEFAULT_MAX_FILES))
|
||||
max_file_bytes = max_file_bytes or int(_cfg("max_file_bytes", _DEFAULT_MAX_FILE_BYTES))
|
||||
sources: List[SourceFile] = []
|
||||
errors: List[str] = []
|
||||
seen = 0
|
||||
|
||||
for d in dirs:
|
||||
root = Path(os.path.expanduser(d)).resolve()
|
||||
if not root.exists():
|
||||
errors.append(f"Path does not exist: {d}")
|
||||
continue
|
||||
if root.is_file():
|
||||
roots = [(root.parent, [root])]
|
||||
else:
|
||||
roots = None
|
||||
|
||||
if roots is None:
|
||||
walk_iter = os.walk(root)
|
||||
else:
|
||||
walk_iter = [(str(root.parent), [], [root.name])]
|
||||
|
||||
for dirpath, dirnames, filenames in walk_iter:
|
||||
# Prune skip dirs in place so os.walk doesn't descend.
|
||||
dirnames[:] = [dn for dn in dirnames if dn not in _SKIP_DIRS and not dn.startswith(".")]
|
||||
for fn in filenames:
|
||||
if seen >= max_files:
|
||||
errors.append(
|
||||
f"Hit max_files={max_files}; stopped ingesting more "
|
||||
f"(raise skills.distill.max_files to ingest larger trees)."
|
||||
)
|
||||
break
|
||||
fpath = Path(dirpath) / fn
|
||||
kind = _classify(fpath)
|
||||
if not kind:
|
||||
continue
|
||||
seen += 1
|
||||
try:
|
||||
if kind == "pdf":
|
||||
text = _read_pdf_text(fpath, max_file_bytes)
|
||||
if not text:
|
||||
continue
|
||||
else:
|
||||
raw = fpath.read_bytes()[:max_file_bytes]
|
||||
try:
|
||||
text = raw.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
text = raw.decode("latin-1", errors="replace")
|
||||
rel = os.path.relpath(str(fpath), str(root if root.is_dir() else root.parent))
|
||||
sf = SourceFile(
|
||||
path=fpath, rel=rel, kind=kind,
|
||||
size=fpath.stat().st_size, text=text,
|
||||
)
|
||||
sf.priority = _file_priority(fpath, kind)
|
||||
sources.append(sf)
|
||||
except Exception as e: # pragma: no cover - defensive
|
||||
errors.append(f"Could not read {fpath}: {e}")
|
||||
if seen >= max_files:
|
||||
break
|
||||
|
||||
return sources, errors
|
||||
|
||||
|
||||
def build_corpus(sources: List[SourceFile], *, budget: Optional[int] = None) -> str:
|
||||
"""Assemble a single token-budgeted corpus string, priority-ordered."""
|
||||
budget = budget or int(_cfg("corpus_budget", _DEFAULT_CORPUS_BUDGET))
|
||||
ordered = sorted(sources, key=lambda s: (-s.priority, len(s.text)))
|
||||
parts: List[str] = []
|
||||
used = 0
|
||||
for sf in ordered:
|
||||
header = f"\n===== FILE: {sf.rel} [{sf.kind}] =====\n"
|
||||
body = sf.text
|
||||
chunk = header + body
|
||||
if used + len(chunk) > budget:
|
||||
remaining = budget - used - len(header)
|
||||
if remaining > 500:
|
||||
parts.append(header + body[:remaining] + "\n... [truncated]\n")
|
||||
used = budget
|
||||
break
|
||||
parts.append(chunk)
|
||||
used += len(chunk)
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 3: synthesize draft SKILL.md
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_SYNTH_SYSTEM = (
|
||||
"You are a skill author for the Hermes Agent. You distill source material "
|
||||
"(code, API docs, manuals, PDFs, configs) into ONE reusable SKILL.md: a "
|
||||
"narrow, actionable procedure an agent can follow later. You do not dump "
|
||||
"the source; you extract the reusable how-to."
|
||||
)
|
||||
|
||||
_SYNTH_TEMPLATE = """\
|
||||
Distill the SOURCE MATERIAL below into a single SKILL.md file.
|
||||
|
||||
Output ONLY the SKILL.md content -- nothing before or after, no code fences.
|
||||
|
||||
Required format:
|
||||
---
|
||||
name: <lowercase-hyphenated, <=64 chars>
|
||||
description: <one sentence, <=200 chars, starts with a trigger like "Use when ...">
|
||||
version: 1.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [<a-few>, <relevant>, <tags>]
|
||||
---
|
||||
|
||||
# <Human Title>
|
||||
|
||||
<2-4 sentence overview of what this skill does and when to use it.>
|
||||
|
||||
## When to use
|
||||
<bullet list of trigger conditions>
|
||||
|
||||
## Steps
|
||||
1. <numbered, concrete steps with EXACT commands / endpoints / code where the source provides them>
|
||||
2. ...
|
||||
|
||||
## Verification
|
||||
<how to confirm each step worked -- exact commands to run, expected output>
|
||||
|
||||
## Pitfalls
|
||||
<gotchas discovered in the source material>
|
||||
|
||||
Rules:
|
||||
- Prefer exact commands, endpoint URLs, function signatures, and config keys
|
||||
that appear VERBATIM in the source. Do not invent flags, paths, or APIs.
|
||||
- If the source is prose-only (a manual with no runnable commands), still
|
||||
produce concrete steps but keep them faithful to the document.
|
||||
- Keep it tight. A good skill is scannable, not a re-paste of the docs.
|
||||
|
||||
User intent hint (may be empty): {hint}
|
||||
|
||||
SOURCE MATERIAL:
|
||||
{corpus}
|
||||
"""
|
||||
|
||||
|
||||
def synthesize_skill_md(
|
||||
corpus: str,
|
||||
*,
|
||||
hint: str = "",
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[float] = None,
|
||||
) -> str:
|
||||
"""Call the auxiliary LLM to produce draft SKILL.md text.
|
||||
|
||||
Uses ``call_llm`` so synthesis is main-model-first and never touches the
|
||||
live conversation's prompt cache.
|
||||
"""
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": _SYNTH_SYSTEM},
|
||||
{"role": "user", "content": _SYNTH_TEMPLATE.format(hint=hint or "(none)", corpus=corpus)},
|
||||
]
|
||||
call_kwargs: Dict[str, Any] = dict(
|
||||
task="skill_distill",
|
||||
messages=messages,
|
||||
main_runtime=main_runtime,
|
||||
temperature=float(_cfg("temperature", 0.2)),
|
||||
max_tokens=int(_cfg("max_tokens", 4000)),
|
||||
)
|
||||
if timeout is not None:
|
||||
call_kwargs["timeout"] = timeout
|
||||
resp = call_llm(**call_kwargs)
|
||||
content = ""
|
||||
try:
|
||||
content = resp.choices[0].message.content or ""
|
||||
except Exception as e: # pragma: no cover - defensive
|
||||
raise RuntimeError(f"Skill synthesis returned an unusable response: {e}")
|
||||
return _strip_code_fences(content).strip()
|
||||
|
||||
|
||||
def _strip_code_fences(text: str) -> str:
|
||||
"""Models sometimes wrap output in ```markdown ... ``` -- strip that."""
|
||||
t = text.strip()
|
||||
if t.startswith("```"):
|
||||
# drop first fence line
|
||||
nl = t.find("\n")
|
||||
if nl != -1:
|
||||
t = t[nl + 1 :]
|
||||
if t.rstrip().endswith("```"):
|
||||
t = t.rstrip()[:-3]
|
||||
return t
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 4: sandboxed verification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
|
||||
_BASH_BLOCK_RE = re.compile(r"```(?:bash|sh|shell|console)\n(.*?)```", re.DOTALL)
|
||||
|
||||
|
||||
def _extract_frontmatter(content: str) -> Optional[Dict[str, Any]]:
|
||||
m = _FRONTMATTER_RE.match(content)
|
||||
if not m:
|
||||
return None
|
||||
try:
|
||||
import yaml
|
||||
|
||||
data = yaml.safe_load(m.group(1))
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _extract_shell_snippets(content: str) -> List[str]:
|
||||
snippets: List[str] = []
|
||||
for block in _BASH_BLOCK_RE.findall(content):
|
||||
for line in block.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
# Drop a leading prompt char.
|
||||
if line.startswith("$ "):
|
||||
line = line[2:]
|
||||
snippets.append(line)
|
||||
return snippets
|
||||
|
||||
|
||||
def verify_skill_draft(
|
||||
content: str,
|
||||
sources: List[SourceFile],
|
||||
*,
|
||||
run_commands: bool = False,
|
||||
snippet_timeout: Optional[int] = None,
|
||||
) -> VerificationResult:
|
||||
"""Verify a draft SKILL.md in an isolated sandbox.
|
||||
|
||||
Tiers (best achievable wins, floor enforced by caller):
|
||||
* "executed" -- shell snippets actually ran in the sandbox with rc 0
|
||||
* "checked" -- frontmatter valid + snippets parse + referenced paths
|
||||
/ commands exist on PATH
|
||||
* "unverified" -- frontmatter valid but nothing checkable
|
||||
* "failed" -- frontmatter invalid or a hard check failed
|
||||
"""
|
||||
snippet_timeout = snippet_timeout or int(_cfg("snippet_timeout", _DEFAULT_SNIPPET_TIMEOUT))
|
||||
checks: List[str] = []
|
||||
warnings: List[str] = []
|
||||
errors: List[str] = []
|
||||
|
||||
# --- frontmatter must be valid (hard gate) ---
|
||||
fm = _extract_frontmatter(content)
|
||||
if not fm:
|
||||
errors.append("Missing or unparseable YAML frontmatter.")
|
||||
return VerificationResult("failed", False, checks, warnings, errors)
|
||||
if not fm.get("name") or not fm.get("description"):
|
||||
errors.append("Frontmatter missing required 'name' or 'description'.")
|
||||
return VerificationResult("failed", False, checks, warnings, errors)
|
||||
checks.append("Frontmatter valid (name + description present).")
|
||||
|
||||
name = str(fm["name"])
|
||||
if not re.match(r"^[a-z0-9][a-z0-9_-]{0,63}$", name):
|
||||
errors.append(f"Skill name '{name}' is not a valid lowercase-hyphenated identifier.")
|
||||
return VerificationResult("failed", False, checks, warnings, errors)
|
||||
checks.append(f"Skill name '{name}' is well-formed.")
|
||||
|
||||
snippets = _extract_shell_snippets(content)
|
||||
referenced_cmds = set()
|
||||
for snip in snippets:
|
||||
try:
|
||||
tokens = shlex.split(snip)
|
||||
except ValueError:
|
||||
warnings.append(f"Could not parse shell snippet: {snip!r}")
|
||||
continue
|
||||
if tokens:
|
||||
referenced_cmds.add(tokens[0])
|
||||
|
||||
# --- existence checks: do referenced commands exist on PATH? ---
|
||||
missing_cmds = [c for c in referenced_cmds if c not in {"cd", "echo", "export", "source", "."}
|
||||
and shutil.which(c) is None]
|
||||
if referenced_cmds:
|
||||
present = sorted(referenced_cmds - set(missing_cmds))
|
||||
if present:
|
||||
checks.append(f"Referenced commands on PATH: {', '.join(present)}.")
|
||||
if missing_cmds:
|
||||
warnings.append(
|
||||
"Referenced commands NOT on PATH (may be project-local): "
|
||||
+ ", ".join(sorted(missing_cmds))
|
||||
)
|
||||
|
||||
achieved = "checked" if (snippets or referenced_cmds) else "unverified"
|
||||
|
||||
# --- optional live execution in a throwaway sandbox ---
|
||||
if run_commands and snippets:
|
||||
sandbox = tempfile.mkdtemp(prefix="hermes_learn_verify_")
|
||||
ran_ok = 0
|
||||
try:
|
||||
# Only run snippets that look read-only / safe to dry-run.
|
||||
for snip in snippets:
|
||||
if not _looks_safe_to_run(snip):
|
||||
warnings.append(f"Skipped (not safe to auto-run): {snip!r}")
|
||||
continue
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
snip, shell=True, cwd=sandbox, capture_output=True,
|
||||
text=True, timeout=snippet_timeout,
|
||||
)
|
||||
if proc.returncode == 0:
|
||||
ran_ok += 1
|
||||
checks.append(f"Ran OK: {snip!r}")
|
||||
else:
|
||||
warnings.append(
|
||||
f"Non-zero exit ({proc.returncode}) for {snip!r}: "
|
||||
f"{(proc.stderr or '').strip()[:160]}"
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
warnings.append(f"Timed out: {snip!r}")
|
||||
except Exception as e:
|
||||
warnings.append(f"Error running {snip!r}: {e}")
|
||||
if ran_ok > 0:
|
||||
achieved = "executed"
|
||||
finally:
|
||||
shutil.rmtree(sandbox, ignore_errors=True)
|
||||
|
||||
passed = achieved in ("executed", "checked", "unverified")
|
||||
return VerificationResult(achieved, passed, checks, warnings, errors)
|
||||
|
||||
|
||||
_SAFE_RUN_PREFIXES = (
|
||||
"ls", "cat", "echo", "pwd", "which", "type", "head", "tail", "grep",
|
||||
"find", "wc", "python --version", "python3 --version", "node --version",
|
||||
"npm --version", "go version", "cargo --version", "git --version",
|
||||
"curl --version", "--help", "-h", "help",
|
||||
)
|
||||
|
||||
|
||||
def _looks_safe_to_run(snip: str) -> bool:
|
||||
"""Heuristic: only auto-run obviously read-only/inspection snippets."""
|
||||
s = snip.strip()
|
||||
low = s.lower()
|
||||
# Block anything that mutates or reaches out destructively.
|
||||
bad = ("rm ", "rmdir", "mv ", "dd ", ">", ">>", "sudo", "chmod", "chown",
|
||||
"kill", "pip install", "npm install", "apt", "brew install",
|
||||
"git push", "git commit", "curl -X", "wget ", "mkfs", "shutdown")
|
||||
if any(b in low for b in bad):
|
||||
return False
|
||||
# Allow --help/--version style probes and read-only inspectors.
|
||||
if "--help" in low or "--version" in low or " -h" in low:
|
||||
return True
|
||||
return any(low.startswith(p) for p in _SAFE_RUN_PREFIXES)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 5: stamp + commit
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _stamp_verification(content: str, vr: VerificationResult, n_sources: int) -> str:
|
||||
"""Record the achieved verification tier in the skill frontmatter."""
|
||||
fm_match = _FRONTMATTER_RE.match(content)
|
||||
if not fm_match:
|
||||
return content
|
||||
stamp_lines = [
|
||||
"metadata:",
|
||||
" hermes:",
|
||||
" distill:",
|
||||
f" verification: {vr.tier}",
|
||||
f" sources_ingested: {n_sources}",
|
||||
f' distilled_at: "{time.strftime("%Y-%m-%d")}"',
|
||||
]
|
||||
# If metadata already exists in frontmatter, just append a distill note
|
||||
# rather than duplicating the metadata key (keep it simple + valid).
|
||||
fm_text = fm_match.group(1)
|
||||
if "metadata:" in fm_text:
|
||||
# Insert distill block under existing hermes metadata if possible;
|
||||
# fall back to a top-level comment to avoid producing invalid YAML.
|
||||
note = f"\n# distill: verification={vr.tier}, sources={n_sources}\n"
|
||||
end = fm_match.end()
|
||||
return content[:end] + note + content[end:]
|
||||
insert = "\n".join(stamp_lines) + "\n"
|
||||
# Place before the closing --- of the frontmatter.
|
||||
closing = content.find("\n---", fm_match.start() + 3)
|
||||
if closing == -1:
|
||||
return content
|
||||
return content[:closing] + "\n" + insert + content[closing:]
|
||||
|
||||
|
||||
def commit_skill(content: str, category: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Commit the draft as a real skill via the skill manager."""
|
||||
from tools.skill_manager_tool import _create_skill
|
||||
|
||||
fm = _extract_frontmatter(content) or {}
|
||||
name = str(fm.get("name") or "").strip()
|
||||
if not name:
|
||||
return {"success": False, "error": "Cannot commit: skill has no name."}
|
||||
if category:
|
||||
return _create_skill(name=name, content=content, category=category)
|
||||
return _create_skill(name=name, content=content)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestrator -- the single entry point every surface calls
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def distill_skill_from_dirs(
|
||||
dirs: List[str],
|
||||
*,
|
||||
hint: str = "",
|
||||
category: Optional[str] = None,
|
||||
run_commands: bool = False,
|
||||
min_tier: str = "checked",
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
progress: Optional[Callable[[str], None]] = None,
|
||||
) -> DistillResult:
|
||||
"""Ingest dirs -> synthesize -> verify -> commit. The /learn backend.
|
||||
|
||||
Args:
|
||||
dirs: directories (or single files) of source material.
|
||||
hint: optional free-text steer ("focus on the auth flow").
|
||||
category: optional skill category folder.
|
||||
run_commands: if True, attempt to execute safe shell snippets in a
|
||||
throwaway sandbox (verification tier can reach "executed").
|
||||
min_tier: minimum verification tier required to COMMIT. One of
|
||||
"executed" | "checked" | "unverified". If the achieved tier is
|
||||
below this floor, the result is returned draft-only (not written).
|
||||
main_runtime: the caller's runtime dict (provider/model) so synthesis
|
||||
resolves to the main model.
|
||||
progress: optional callback for streaming status lines to a UI.
|
||||
|
||||
Returns:
|
||||
DistillResult.
|
||||
"""
|
||||
t0 = time.time()
|
||||
|
||||
def _say(msg: str) -> None:
|
||||
if progress:
|
||||
try:
|
||||
progress(msg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
tier_rank = {"failed": 0, "unverified": 1, "checked": 2, "executed": 3}
|
||||
floor = tier_rank.get(min_tier, 2)
|
||||
|
||||
# 1+2: ingest & corpus
|
||||
_say(f"Ingesting {len(dirs)} path(s)...")
|
||||
sources, ingest_errors = ingest_directories(dirs)
|
||||
if not sources:
|
||||
msg = "No readable source material found."
|
||||
if ingest_errors:
|
||||
msg += " " + "; ".join(ingest_errors[:3])
|
||||
return DistillResult(success=False, error=msg, elapsed_seconds=time.time() - t0)
|
||||
|
||||
breakdown: Dict[str, int] = {}
|
||||
for sf in sources:
|
||||
breakdown[sf.kind] = breakdown.get(sf.kind, 0) + 1
|
||||
_say(
|
||||
"Ingested "
|
||||
+ ", ".join(f"{v} {k}" for k, v in sorted(breakdown.items()))
|
||||
+ f" ({len(sources)} files)."
|
||||
)
|
||||
|
||||
corpus = build_corpus(sources)
|
||||
|
||||
# 3: synthesize
|
||||
_say("Synthesizing draft skill...")
|
||||
try:
|
||||
draft = synthesize_skill_md(corpus, hint=hint, main_runtime=main_runtime)
|
||||
except Exception as e:
|
||||
return DistillResult(
|
||||
success=False, error=f"Synthesis failed: {e}",
|
||||
sources_ingested=len(sources), source_breakdown=breakdown,
|
||||
elapsed_seconds=time.time() - t0,
|
||||
)
|
||||
if not draft or "---" not in draft:
|
||||
return DistillResult(
|
||||
success=False, error="Synthesis produced no usable SKILL.md.",
|
||||
sources_ingested=len(sources), source_breakdown=breakdown,
|
||||
draft_content=draft, elapsed_seconds=time.time() - t0,
|
||||
)
|
||||
|
||||
# 4: verify
|
||||
_say("Verifying draft in sandbox...")
|
||||
vr = verify_skill_draft(draft, sources, run_commands=run_commands)
|
||||
_say(f"Verification tier: {vr.tier}.")
|
||||
|
||||
fm = _extract_frontmatter(draft) or {}
|
||||
skill_name = str(fm.get("name") or "").strip() or None
|
||||
|
||||
if tier_rank.get(vr.tier, 0) < floor:
|
||||
return DistillResult(
|
||||
success=False, skill_name=skill_name, verification=vr,
|
||||
sources_ingested=len(sources), source_breakdown=breakdown,
|
||||
draft_only=True, draft_content=draft,
|
||||
error=(f"Verification tier '{vr.tier}' is below the required "
|
||||
f"floor '{min_tier}'. Draft not committed."),
|
||||
elapsed_seconds=time.time() - t0,
|
||||
)
|
||||
|
||||
# 5: stamp + commit
|
||||
stamped = _stamp_verification(draft, vr, len(sources))
|
||||
_say(f"Committing skill '{skill_name}'...")
|
||||
commit = commit_skill(stamped, category=category)
|
||||
if not commit.get("success"):
|
||||
return DistillResult(
|
||||
success=False, skill_name=skill_name, verification=vr,
|
||||
sources_ingested=len(sources), source_breakdown=breakdown,
|
||||
draft_content=stamped,
|
||||
error=f"Commit failed: {commit.get('error')}",
|
||||
elapsed_seconds=time.time() - t0,
|
||||
)
|
||||
|
||||
return DistillResult(
|
||||
success=True, skill_name=skill_name,
|
||||
skill_path=commit.get("skill_md") or commit.get("path"),
|
||||
category=category, verification=vr,
|
||||
sources_ingested=len(sources), source_breakdown=breakdown,
|
||||
elapsed_seconds=time.time() - t0,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared result rendering -- used by CLI, gateway, TUI, and dashboard so the
|
||||
# user-facing summary is identical everywhere.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TIER_BLURB = {
|
||||
"executed": "executed — shell snippets ran successfully in a sandbox",
|
||||
"checked": "checked — frontmatter valid, snippets parse, commands resolved",
|
||||
"unverified": "unverified — valid skill but nothing runnable to test",
|
||||
"failed": "failed — the draft did not pass basic validation",
|
||||
}
|
||||
|
||||
|
||||
def render_distill_result(res: DistillResult, *, markdown: bool = False) -> str:
|
||||
"""Render a DistillResult as a human summary.
|
||||
|
||||
markdown=False -> plain text (CLI / TUI).
|
||||
markdown=True -> light markdown (gateway messengers / dashboard).
|
||||
"""
|
||||
lines: List[str] = []
|
||||
b = "**" if markdown else ""
|
||||
|
||||
if res.error and not res.success and not res.draft_only:
|
||||
lines.append(f"{b}/learn failed:{b} {res.error}")
|
||||
return "\n".join(lines)
|
||||
|
||||
breakdown = ", ".join(f"{v} {k}" for k, v in sorted(res.source_breakdown.items()))
|
||||
vr = res.verification
|
||||
|
||||
if res.success:
|
||||
lines.append(f"{b}Learned skill:{b} {res.skill_name}")
|
||||
if res.skill_path:
|
||||
lines.append(f" path: {res.skill_path}")
|
||||
lines.append(f" sources: {res.sources_ingested} files ({breakdown})")
|
||||
if vr:
|
||||
lines.append(f" verification: {_TIER_BLURB.get(vr.tier, vr.tier)}")
|
||||
lines.append(f" took {res.elapsed_seconds:.1f}s")
|
||||
return "\n".join(lines)
|
||||
|
||||
if res.draft_only:
|
||||
lines.append(f"{b}Draft not committed.{b} {res.error}")
|
||||
if vr:
|
||||
lines.append(f" verification: {_TIER_BLURB.get(vr.tier, vr.tier)}")
|
||||
for e in vr.errors:
|
||||
lines.append(f" error: {e}")
|
||||
for w in vr.warnings[:5]:
|
||||
lines.append(f" warning: {w}")
|
||||
lines.append(" Re-run with a lower --min-tier to commit anyway, "
|
||||
"or refine the sources.")
|
||||
return "\n".join(lines)
|
||||
|
||||
lines.append(f"{b}/learn failed:{b} {res.error or 'unknown error'}")
|
||||
return "\n".join(lines)
|
||||
3
cli.py
3
cli.py
|
|
@ -7480,6 +7480,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
|
|||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
elif canonical == "learn":
|
||||
with self._busy_command("Distilling skill from sources..."):
|
||||
self._handle_learn_command(cmd_original)
|
||||
elif canonical == "memory":
|
||||
self._handle_memory_command(cmd_original)
|
||||
elif canonical == "platforms":
|
||||
|
|
|
|||
|
|
@ -7488,6 +7488,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
if canonical == "skills":
|
||||
return await self._handle_skills_command(event)
|
||||
|
||||
if canonical == "learn":
|
||||
return await self._handle_learn_command(event)
|
||||
|
||||
if canonical == "fast":
|
||||
return await self._handle_fast_command(event)
|
||||
|
||||
|
|
|
|||
|
|
@ -2260,6 +2260,89 @@ class GatewaySlashCommandsMixin:
|
|||
f"on the CLI, or ~/.hermes/pending/skills/{pending_id}.json)")
|
||||
return out
|
||||
|
||||
async def _handle_learn_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /learn on the gateway — distill a skill from source dirs.
|
||||
|
||||
Usage: /learn <dirpath> [dirpath ...] [--hint text]
|
||||
[--min-tier executed|checked|unverified] [--category name]
|
||||
|
||||
The directories must be readable by the gateway process (paths are
|
||||
resolved on the host the gateway runs on, not the sender's machine).
|
||||
``--run`` (live sandbox execution of distilled snippets) is restricted
|
||||
to admin users, since it executes shell commands.
|
||||
"""
|
||||
import asyncio
|
||||
import shlex
|
||||
|
||||
from agent.skill_distill import distill_skill_from_dirs, render_distill_result
|
||||
|
||||
raw_args = event.get_command_args().strip()
|
||||
if not raw_args:
|
||||
return ("Usage: /learn <dirpath> [dirpath ...] [--hint text] "
|
||||
"[--min-tier executed|checked|unverified]\n"
|
||||
"Distills a reusable skill from directories of source "
|
||||
"material (code, docs, PDFs) readable by the gateway host.")
|
||||
try:
|
||||
tokens = shlex.split(raw_args)
|
||||
except ValueError as e:
|
||||
return f"/learn: could not parse arguments ({e})."
|
||||
|
||||
paths: list[str] = []
|
||||
hint = ""
|
||||
category = None
|
||||
run_commands = False
|
||||
min_tier = "checked"
|
||||
i = 0
|
||||
while i < len(tokens):
|
||||
tk = tokens[i]
|
||||
if tk == "--hint" and i + 1 < len(tokens):
|
||||
hint = tokens[i + 1]; i += 2; continue
|
||||
if tk == "--category" and i + 1 < len(tokens):
|
||||
category = tokens[i + 1]; i += 2; continue
|
||||
if tk == "--min-tier" and i + 1 < len(tokens):
|
||||
min_tier = tokens[i + 1]; i += 2; continue
|
||||
if tk == "--run":
|
||||
run_commands = True; i += 1; continue
|
||||
paths.append(tk); i += 1
|
||||
|
||||
if not paths:
|
||||
return "/learn needs at least one directory path."
|
||||
|
||||
# --run executes shell snippets in a sandbox — admin only over the gateway.
|
||||
if run_commands:
|
||||
is_admin = False
|
||||
try:
|
||||
from gateway.slash_access import policy_for_source as _policy_for_source
|
||||
|
||||
source = event.source
|
||||
user_id = (source.user_id if source else None)
|
||||
policy = _policy_for_source(self.config, source)
|
||||
# When no admin list is configured (policy.enabled is False),
|
||||
# the scope is unrestricted, so allow --run.
|
||||
if not getattr(policy, "enabled", False):
|
||||
is_admin = True
|
||||
elif user_id is not None and policy.is_admin(user_id):
|
||||
is_admin = True
|
||||
except Exception:
|
||||
is_admin = False
|
||||
if not is_admin:
|
||||
run_commands = False # silently downgrade — verification stays at 'checked'
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _run_distill():
|
||||
return distill_skill_from_dirs(
|
||||
paths,
|
||||
hint=hint,
|
||||
category=category,
|
||||
run_commands=run_commands,
|
||||
min_tier=min_tier,
|
||||
main_runtime=None, # main-model-first resolution from config
|
||||
)
|
||||
|
||||
res = await loop.run_in_executor(None, _run_distill)
|
||||
return render_distill_result(res, markdown=True)
|
||||
|
||||
async def _handle_fast_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /fast — mirror the CLI Priority Processing toggle in gateway chats."""
|
||||
from gateway.run import _hermes_home, _load_gateway_config, _resolve_gateway_model
|
||||
|
|
|
|||
|
|
@ -1400,6 +1400,66 @@ class CLICommandsMixin:
|
|||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
handle_skills_slash(cmd, ChatConsole())
|
||||
|
||||
def _handle_learn_command(self, cmd: str):
|
||||
"""Handle /learn slash command — distill a skill from source dirs.
|
||||
|
||||
Usage: /learn <dirpath> [dirpath ...] [--hint text] [--run]
|
||||
[--min-tier executed|checked|unverified] [--category name]
|
||||
"""
|
||||
from agent.skill_distill import distill_skill_from_dirs, render_distill_result
|
||||
|
||||
# Lightweight arg parse (shlex) — the slash surface doesn't use argparse.
|
||||
import shlex
|
||||
try:
|
||||
tokens = shlex.split(cmd.strip())[1:] # drop "/learn"
|
||||
except ValueError as e:
|
||||
print(f" /learn: could not parse arguments ({e}).")
|
||||
return
|
||||
|
||||
paths: list[str] = []
|
||||
hint = ""
|
||||
category = None
|
||||
run_commands = False
|
||||
min_tier = "checked"
|
||||
i = 0
|
||||
while i < len(tokens):
|
||||
t = tokens[i]
|
||||
if t == "--hint" and i + 1 < len(tokens):
|
||||
hint = tokens[i + 1]; i += 2; continue
|
||||
if t == "--category" and i + 1 < len(tokens):
|
||||
category = tokens[i + 1]; i += 2; continue
|
||||
if t == "--min-tier" and i + 1 < len(tokens):
|
||||
min_tier = tokens[i + 1]; i += 2; continue
|
||||
if t == "--run":
|
||||
run_commands = True; i += 1; continue
|
||||
paths.append(t); i += 1
|
||||
|
||||
if not paths:
|
||||
print(" /learn <dirpath> [dirpath ...] [--hint text] [--run] "
|
||||
"[--min-tier executed|checked|unverified]")
|
||||
return
|
||||
|
||||
main_runtime = None
|
||||
if getattr(self, "agent", None):
|
||||
main_runtime = {
|
||||
"model": getattr(self, "model", None),
|
||||
"provider": getattr(self, "provider", None),
|
||||
"base_url": getattr(self, "base_url", None),
|
||||
"api_key": getattr(self, "api_key", None),
|
||||
"api_mode": getattr(self, "api_mode", None),
|
||||
}
|
||||
|
||||
res = distill_skill_from_dirs(
|
||||
paths,
|
||||
hint=hint,
|
||||
category=category,
|
||||
run_commands=run_commands,
|
||||
min_tier=min_tier,
|
||||
main_runtime=main_runtime,
|
||||
progress=lambda m: print(f" · {m}", flush=True),
|
||||
)
|
||||
print(render_distill_result(res, markdown=False))
|
||||
|
||||
def _handle_memory_command(self, cmd: str):
|
||||
"""Handle /memory slash command — pending review + approval-gate toggle."""
|
||||
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
||||
|
|
|
|||
|
|
@ -170,6 +170,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
gateway_config_gate="skills.write_approval",
|
||||
subcommands=("search", "browse", "inspect", "install", "audit",
|
||||
"pending", "approve", "reject", "diff", "approval")),
|
||||
CommandDef("learn", "Distill a reusable skill from directories of source material (code, docs, PDFs)",
|
||||
"Tools & Skills",
|
||||
args_hint="<dirpath> [dirpath ...] [--hint text] [--run]"),
|
||||
CommandDef("memory", "Review pending memory writes / toggle the approval gate",
|
||||
"Tools & Skills",
|
||||
args_hint="[pending|approve|reject|approval] [id|on|off]",
|
||||
|
|
|
|||
|
|
@ -296,6 +296,7 @@ from hermes_cli.subcommands.acp import build_acp_parser
|
|||
from hermes_cli.subcommands.tools import build_tools_parser
|
||||
from hermes_cli.subcommands.insights import build_insights_parser
|
||||
from hermes_cli.subcommands.skills import build_skills_parser
|
||||
from hermes_cli.subcommands.learn import build_learn_parser
|
||||
from hermes_cli.subcommands.pairing import build_pairing_parser
|
||||
from hermes_cli.subcommands.plugins import build_plugins_parser
|
||||
from hermes_cli.subcommands.mcp import build_mcp_parser
|
||||
|
|
@ -10957,7 +10958,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
|||
"computer-use",
|
||||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"gui", "desktop", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
|
||||
"gui", "desktop", "kanban", "learn", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
|
||||
"model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
|
||||
"prompt-size",
|
||||
"send", "sessions", "setup",
|
||||
|
|
@ -11411,6 +11412,59 @@ def cmd_pairing(args):
|
|||
|
||||
pairing_command(args)
|
||||
|
||||
def cmd_learn(args):
|
||||
"""Distill a reusable skill from directories of source material."""
|
||||
import json as _json
|
||||
|
||||
from agent.skill_distill import distill_skill_from_dirs, render_distill_result
|
||||
|
||||
paths = list(getattr(args, "paths", None) or [])
|
||||
if not paths:
|
||||
print("learn: at least one directory or file path is required.")
|
||||
return
|
||||
|
||||
use_json = bool(getattr(args, "json", False))
|
||||
|
||||
def _progress(msg: str) -> None:
|
||||
if not use_json:
|
||||
print(f" · {msg}", flush=True)
|
||||
|
||||
res = distill_skill_from_dirs(
|
||||
paths,
|
||||
hint=getattr(args, "hint", "") or "",
|
||||
category=(getattr(args, "category", "") or None),
|
||||
run_commands=bool(getattr(args, "run", False)),
|
||||
min_tier=getattr(args, "min_tier", "checked"),
|
||||
progress=_progress,
|
||||
)
|
||||
|
||||
if use_json:
|
||||
payload = {
|
||||
"success": res.success,
|
||||
"skill_name": res.skill_name,
|
||||
"skill_path": res.skill_path,
|
||||
"category": res.category,
|
||||
"draft_only": res.draft_only,
|
||||
"sources_ingested": res.sources_ingested,
|
||||
"source_breakdown": res.source_breakdown,
|
||||
"verification": (
|
||||
{
|
||||
"tier": res.verification.tier,
|
||||
"passed": res.verification.passed,
|
||||
"checks": res.verification.checks,
|
||||
"warnings": res.verification.warnings,
|
||||
"errors": res.verification.errors,
|
||||
}
|
||||
if res.verification
|
||||
else None
|
||||
),
|
||||
"error": res.error,
|
||||
"elapsed_seconds": round(res.elapsed_seconds, 2),
|
||||
}
|
||||
print(_json.dumps(payload, indent=2))
|
||||
else:
|
||||
print(render_distill_result(res, markdown=False))
|
||||
|
||||
|
||||
def cmd_plugins(args):
|
||||
from hermes_cli.plugins_cmd import plugins_command
|
||||
|
|
@ -11761,6 +11815,11 @@ def main():
|
|||
# =========================================================================
|
||||
build_skills_parser(subparsers, cmd_skills=cmd_skills)
|
||||
|
||||
# =========================================================================
|
||||
# learn command (parser built in hermes_cli/subcommands/learn.py)
|
||||
# =========================================================================
|
||||
build_learn_parser(subparsers, cmd_learn=cmd_learn)
|
||||
|
||||
# =========================================================================
|
||||
# bundles command — skill bundles (alias /<name> for multiple skills)
|
||||
# =========================================================================
|
||||
|
|
|
|||
65
hermes_cli/subcommands/learn.py
Normal file
65
hermes_cli/subcommands/learn.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
"""``hermes learn`` subcommand parser.
|
||||
|
||||
Distills a reusable skill from one or more directories of source material
|
||||
(source code, API docs, instruction manuals, PDFs, configs). Backed by
|
||||
``agent.skill_distill.distill_skill_from_dirs``.
|
||||
|
||||
Handler injected to avoid importing ``main``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
|
||||
|
||||
def build_learn_parser(subparsers, *, cmd_learn: Callable) -> None:
|
||||
"""Attach the ``learn`` subcommand to ``subparsers``."""
|
||||
learn_parser = subparsers.add_parser(
|
||||
"learn",
|
||||
help="Distill a reusable skill from directories of source material",
|
||||
description=(
|
||||
"Point Hermes at one or more directories (source code, API docs, "
|
||||
"instruction manuals, PDFs, configs). It ingests the material, "
|
||||
"synthesizes a draft SKILL.md, verifies it in a sandbox, and "
|
||||
"commits the skill only when verification passes."
|
||||
),
|
||||
)
|
||||
learn_parser.add_argument(
|
||||
"paths",
|
||||
nargs="+",
|
||||
help="One or more directories (or files) of source material",
|
||||
)
|
||||
learn_parser.add_argument(
|
||||
"--hint",
|
||||
default="",
|
||||
help="Free-text steer for the distillation (e.g. 'focus on the auth flow')",
|
||||
)
|
||||
learn_parser.add_argument(
|
||||
"--category",
|
||||
default="",
|
||||
help="Category folder to place the new skill under",
|
||||
)
|
||||
learn_parser.add_argument(
|
||||
"--run",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Attempt to execute safe, read-only shell snippets from the draft "
|
||||
"in a throwaway sandbox (verification can reach the 'executed' tier). "
|
||||
"Off by default."
|
||||
),
|
||||
)
|
||||
learn_parser.add_argument(
|
||||
"--min-tier",
|
||||
default="checked",
|
||||
choices=["executed", "checked", "unverified"],
|
||||
help=(
|
||||
"Minimum verification tier required to commit the skill "
|
||||
"(default: checked). Drafts below the floor are shown but not saved."
|
||||
),
|
||||
)
|
||||
learn_parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Emit the result as JSON instead of a human summary",
|
||||
)
|
||||
learn_parser.set_defaults(func=cmd_learn)
|
||||
|
|
@ -8340,6 +8340,79 @@ async def install_skill_hub(body: SkillInstallRequest, profile: Optional[str] =
|
|||
return {"ok": True, "pid": proc.pid, "name": "skills-install"}
|
||||
|
||||
|
||||
class SkillLearnRequest(BaseModel):
|
||||
paths: List[str]
|
||||
hint: Optional[str] = ""
|
||||
category: Optional[str] = None
|
||||
run: bool = False
|
||||
min_tier: str = "checked"
|
||||
profile: Optional[str] = None
|
||||
|
||||
|
||||
@app.post("/api/skills/learn")
|
||||
async def learn_skill(body: SkillLearnRequest, profile: Optional[str] = None):
|
||||
"""Distill a reusable skill from directories of source material.
|
||||
|
||||
Runs ``hermes learn <paths> --json`` synchronously (in a worker thread so
|
||||
the event loop is not blocked) and returns the structured DistillResult.
|
||||
The directories are resolved on the host the web server runs on.
|
||||
"""
|
||||
paths = [p for p in (body.paths or []) if (p or "").strip()]
|
||||
if not paths:
|
||||
raise HTTPException(status_code=400, detail="at least one path is required")
|
||||
if body.min_tier not in ("executed", "checked", "unverified"):
|
||||
raise HTTPException(status_code=400, detail="min_tier must be executed|checked|unverified")
|
||||
|
||||
args = _profile_cli_args(body.profile or profile) + ["learn", *paths,
|
||||
"--min-tier", body.min_tier,
|
||||
"--json"]
|
||||
if body.hint:
|
||||
args += ["--hint", body.hint]
|
||||
if body.category:
|
||||
args += ["--category", body.category]
|
||||
if body.run:
|
||||
args += ["--run"]
|
||||
|
||||
cmd = [sys.executable, "-m", "hermes_cli.main", *args]
|
||||
|
||||
def _run() -> subprocess.CompletedProcess:
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
cwd=str(PROJECT_ROOT),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
env={**os.environ, "HERMES_NONINTERACTIVE": "1"},
|
||||
)
|
||||
|
||||
try:
|
||||
proc = await asyncio.to_thread(_run)
|
||||
except subprocess.TimeoutExpired:
|
||||
raise HTTPException(status_code=504, detail="learn timed out (600s)")
|
||||
except Exception as exc:
|
||||
_log.exception("Failed to run skills learn")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to run learn: {exc}")
|
||||
|
||||
# The CLI prints the JSON payload as the last JSON object on stdout.
|
||||
out = (proc.stdout or "").strip()
|
||||
payload = None
|
||||
if out:
|
||||
# Find the JSON object (skip any leading banner / Bitwarden lines).
|
||||
brace = out.find("{")
|
||||
if brace != -1:
|
||||
try:
|
||||
payload = json.loads(out[brace:])
|
||||
except Exception:
|
||||
payload = None
|
||||
if payload is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"learn produced no parseable result (rc={proc.returncode}). "
|
||||
f"stderr: {(proc.stderr or '')[:300]}",
|
||||
)
|
||||
return payload
|
||||
|
||||
|
||||
class SkillUninstallRequest(BaseModel):
|
||||
name: str
|
||||
profile: Optional[str] = None
|
||||
|
|
|
|||
184
tests/agent/test_skill_distill.py
Normal file
184
tests/agent/test_skill_distill.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
"""Tests for the skill distillation engine (``/learn`` backend).
|
||||
|
||||
Covers the deterministic, non-LLM machinery: ingest/classify, corpus
|
||||
assembly, sandboxed verification tiers, frontmatter stamping, and the
|
||||
tier-floor gating in the orchestrator. The LLM synthesis step is stubbed so
|
||||
these tests are hermetic (no network, no credentials).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import skill_distill as sd
|
||||
|
||||
|
||||
def _make_src(tmp_path: Path) -> Path:
|
||||
src = tmp_path / "src"
|
||||
src.mkdir()
|
||||
(src / "README.md").write_text(
|
||||
"# WidgetAPI\nFetch widgets.\n\n## Usage\n```bash\necho hello\nls\n```\n"
|
||||
)
|
||||
(src / "client.py").write_text("def get_widget(i):\n return i\n")
|
||||
(src / "api.md").write_text("# Endpoints\nGET /widgets/{id}\n")
|
||||
nm = src / "node_modules"
|
||||
nm.mkdir()
|
||||
(nm / "junk.js").write_text("x" * 500)
|
||||
return src
|
||||
|
||||
|
||||
def test_ingest_classifies_and_prunes_skip_dirs(tmp_path):
|
||||
src = _make_src(tmp_path)
|
||||
sources, errors = sd.ingest_directories([str(src)])
|
||||
rels = sorted(s.rel for s in sources)
|
||||
kinds = {s.kind for s in sources}
|
||||
assert "README.md" in rels
|
||||
assert "client.py" in rels
|
||||
assert "api.md" in rels
|
||||
# node_modules must be pruned, never ingested.
|
||||
assert all("node_modules" not in r for r in rels)
|
||||
assert kinds == {"code", "doc"}
|
||||
|
||||
|
||||
def test_ingest_missing_path_reports_error(tmp_path):
|
||||
sources, errors = sd.ingest_directories([str(tmp_path / "nope")])
|
||||
assert sources == []
|
||||
assert any("does not exist" in e for e in errors)
|
||||
|
||||
|
||||
def test_build_corpus_prioritizes_docs_over_code(tmp_path):
|
||||
src = _make_src(tmp_path)
|
||||
sources, _ = sd.ingest_directories([str(src)])
|
||||
corpus = sd.build_corpus(sources)
|
||||
# README (high-signal doc) should appear before client.py (code).
|
||||
assert corpus.find("README.md") < corpus.find("client.py")
|
||||
|
||||
|
||||
def test_build_corpus_respects_budget(tmp_path):
|
||||
src = _make_src(tmp_path)
|
||||
sources, _ = sd.ingest_directories([str(src)])
|
||||
corpus = sd.build_corpus(sources, budget=120)
|
||||
assert len(corpus) <= 400 # budget + one header's slack
|
||||
|
||||
|
||||
_VALID_DRAFT = """---
|
||||
name: widget-api
|
||||
description: Use when fetching widgets from the WidgetAPI.
|
||||
version: 1.0.0
|
||||
---
|
||||
|
||||
# Widget API
|
||||
|
||||
Use when fetching widgets.
|
||||
|
||||
## Steps
|
||||
1. Inspect:
|
||||
```bash
|
||||
echo hello
|
||||
ls
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
def test_verify_failed_without_frontmatter(tmp_path):
|
||||
src = _make_src(tmp_path)
|
||||
sources, _ = sd.ingest_directories([str(src)])
|
||||
vr = sd.verify_skill_draft("just prose", sources)
|
||||
assert vr.tier == "failed"
|
||||
assert not vr.passed
|
||||
|
||||
|
||||
def test_verify_checked_tier_without_running(tmp_path):
|
||||
src = _make_src(tmp_path)
|
||||
sources, _ = sd.ingest_directories([str(src)])
|
||||
vr = sd.verify_skill_draft(_VALID_DRAFT, sources, run_commands=False)
|
||||
# Snippets parse + commands resolve, but nothing ran.
|
||||
assert vr.tier == "checked"
|
||||
assert vr.passed
|
||||
|
||||
|
||||
def test_verify_executed_tier_when_running(tmp_path):
|
||||
src = _make_src(tmp_path)
|
||||
sources, _ = sd.ingest_directories([str(src)])
|
||||
vr = sd.verify_skill_draft(_VALID_DRAFT, sources, run_commands=True)
|
||||
# echo / ls are safe to auto-run -> at least one ran rc 0.
|
||||
assert vr.tier == "executed"
|
||||
assert vr.passed
|
||||
|
||||
|
||||
def test_verify_rejects_bad_name():
|
||||
bad = _VALID_DRAFT.replace("name: widget-api", "name: Not A Valid Name!")
|
||||
vr = sd.verify_skill_draft(bad, [])
|
||||
assert vr.tier == "failed"
|
||||
|
||||
|
||||
def test_unsafe_snippets_not_run():
|
||||
assert sd._looks_safe_to_run("echo hello") is True
|
||||
assert sd._looks_safe_to_run("ls -la") is True
|
||||
assert sd._looks_safe_to_run("python3 --version") is True
|
||||
assert sd._looks_safe_to_run("rm -rf /") is False
|
||||
assert sd._looks_safe_to_run("pip install evil") is False
|
||||
assert sd._looks_safe_to_run("curl -X POST http://x") is False
|
||||
assert sd._looks_safe_to_run("echo x > file") is False
|
||||
|
||||
|
||||
def test_stamp_records_verification_tier():
|
||||
vr = sd.VerificationResult("checked", True)
|
||||
stamped = sd._stamp_verification(_VALID_DRAFT, vr, 3)
|
||||
assert "checked" in stamped
|
||||
# Frontmatter must still be parseable after stamping.
|
||||
fm = sd._extract_frontmatter(stamped)
|
||||
assert fm and fm.get("name") == "widget-api"
|
||||
|
||||
|
||||
def test_orchestrator_blocks_below_floor(tmp_path, monkeypatch):
|
||||
src = _make_src(tmp_path)
|
||||
# Stub synthesis to return a draft with no runnable snippets -> 'unverified'.
|
||||
draft = (
|
||||
"---\nname: prose-skill\n"
|
||||
"description: Use when reading a manual with no commands.\n"
|
||||
"version: 1.0.0\n---\n\n# Prose Skill\n\nJust prose, no code.\n"
|
||||
)
|
||||
monkeypatch.setattr(sd, "synthesize_skill_md", lambda *a, **k: draft)
|
||||
res = sd.distill_skill_from_dirs([str(src)], min_tier="checked")
|
||||
assert res.success is False
|
||||
assert res.draft_only is True
|
||||
assert res.verification is not None
|
||||
assert res.verification.tier == "unverified"
|
||||
|
||||
|
||||
def test_orchestrator_commits_when_floor_met(tmp_path, monkeypatch):
|
||||
# Isolated HERMES_HOME so the commit writes nowhere real.
|
||||
home = tmp_path / ".hermes"
|
||||
(home / "skills").mkdir(parents=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
src = _make_src(tmp_path)
|
||||
monkeypatch.setattr(sd, "synthesize_skill_md", lambda *a, **k: _VALID_DRAFT)
|
||||
res = sd.distill_skill_from_dirs(
|
||||
[str(src)], min_tier="checked", run_commands=False
|
||||
)
|
||||
assert res.success is True, res.error
|
||||
assert res.skill_name == "widget-api"
|
||||
assert res.skill_path and Path(res.skill_path).exists()
|
||||
|
||||
|
||||
def test_render_distill_result_shapes():
|
||||
ok = sd.DistillResult(
|
||||
success=True, skill_name="x", skill_path="/p",
|
||||
verification=sd.VerificationResult("checked", True),
|
||||
sources_ingested=2, source_breakdown={"doc": 2},
|
||||
)
|
||||
out = sd.render_distill_result(ok)
|
||||
assert "Learned skill" in out and "checked" in out
|
||||
|
||||
blocked = sd.DistillResult(
|
||||
success=False, draft_only=True, skill_name="x",
|
||||
verification=sd.VerificationResult("unverified", True),
|
||||
error="below floor",
|
||||
)
|
||||
out2 = sd.render_distill_result(blocked, markdown=True)
|
||||
assert "Draft not committed" in out2
|
||||
|
|
@ -652,6 +652,35 @@ export const opsCommands: SlashCommand[] = [
|
|||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 'distill a reusable skill from dirs of source material (code, docs, PDFs) — /learn <dir> [dir ...] [--hint text] [--run]',
|
||||
name: 'learn',
|
||||
run: (arg, ctx, cmd) => {
|
||||
if (!arg.trim()) {
|
||||
return ctx.transcript.sys(
|
||||
'usage: /learn <dirpath> [dirpath ...] [--hint text] [--run] [--min-tier executed|checked|unverified]'
|
||||
)
|
||||
}
|
||||
|
||||
ctx.transcript.sys('distilling skill from sources (synthesize + verify)…')
|
||||
|
||||
ctx.gateway.gw
|
||||
.request<SlashExecResponse>('slash.exec', { command: cmd.slice(1), session_id: ctx.sid })
|
||||
.then(r => {
|
||||
if (ctx.stale()) {
|
||||
return
|
||||
}
|
||||
|
||||
const body = r?.output || '/learn: no output'
|
||||
const text = r?.warning ? `warning: ${r.warning}\n${body}` : body
|
||||
const long = text.length > 180 || text.split('\n').filter(Boolean).length > 2
|
||||
|
||||
long ? ctx.transcript.page(text, 'Learn') : ctx.transcript.sys(text)
|
||||
})
|
||||
.catch(ctx.guardedErr)
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 'view & toggle plugins (no arg opens the hub; enable/disable <name> for direct toggle)',
|
||||
name: 'plugins',
|
||||
|
|
|
|||
|
|
@ -1120,6 +1120,21 @@ export const api = {
|
|||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name, profile: profile || undefined }),
|
||||
}),
|
||||
learnSkill: (
|
||||
req: {
|
||||
paths: string[];
|
||||
hint?: string;
|
||||
category?: string;
|
||||
run?: boolean;
|
||||
min_tier?: "executed" | "checked" | "unverified";
|
||||
},
|
||||
profile?: string,
|
||||
) =>
|
||||
fetchJSON<LearnSkillResult>("/api/skills/learn", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ ...req, profile: profile || undefined }),
|
||||
}),
|
||||
updateSkillsFromHub: (profile?: string) =>
|
||||
fetchJSON<ActionResponse>("/api/skills/hub/update", {
|
||||
method: "POST",
|
||||
|
|
@ -1170,6 +1185,25 @@ export interface ActionResponse {
|
|||
update_command?: string;
|
||||
}
|
||||
|
||||
export interface LearnSkillResult {
|
||||
success: boolean;
|
||||
skill_name: string | null;
|
||||
skill_path: string | null;
|
||||
category: string | null;
|
||||
draft_only: boolean;
|
||||
sources_ingested: number;
|
||||
source_breakdown: Record<string, number>;
|
||||
verification: {
|
||||
tier: "executed" | "checked" | "unverified" | "failed";
|
||||
passed: boolean;
|
||||
checks: string[];
|
||||
warnings: string[];
|
||||
errors: string[];
|
||||
} | null;
|
||||
error: string | null;
|
||||
elapsed_seconds: number;
|
||||
}
|
||||
|
||||
export interface DebugShareResponse {
|
||||
ok: boolean;
|
||||
// label -> paste URL, e.g. { Report: "https://paste.rs/abc", "agent.log": "..." }
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import type {
|
|||
SkillHubInstalledEntry,
|
||||
SkillHubPreview,
|
||||
SkillHubScan,
|
||||
LearnSkillResult,
|
||||
} from "@/lib/api";
|
||||
import { useProfileScope } from "@/contexts/useProfileScope";
|
||||
import { ToolsetConfigDrawer } from "@/components/ToolsetConfigDrawer";
|
||||
|
|
@ -136,6 +137,14 @@ export default function SkillsPage() {
|
|||
// Skill editor dialog: open + which skill is being edited (null = create).
|
||||
const [editorOpen, setEditorOpen] = useState(false);
|
||||
const [editorSkill, setEditorSkill] = useState<string | null>(null);
|
||||
// "Learn from sources" dialog state.
|
||||
const [learnOpen, setLearnOpen] = useState(false);
|
||||
const [learnPaths, setLearnPaths] = useState("");
|
||||
const [learnHint, setLearnHint] = useState("");
|
||||
const [learnMinTier, setLearnMinTier] = useState<"executed" | "checked" | "unverified">("checked");
|
||||
const [learnRun, setLearnRun] = useState(false);
|
||||
const [learnBusy, setLearnBusy] = useState(false);
|
||||
const [learnResult, setLearnResult] = useState<LearnSkillResult | null>(null);
|
||||
const { toast, showToast } = useToast();
|
||||
const { t } = useI18n();
|
||||
const { setAfterTitle, setEnd } = usePageHeader();
|
||||
|
|
@ -229,6 +238,45 @@ export default function SkillsPage() {
|
|||
[selectedProfile, showToast],
|
||||
);
|
||||
|
||||
/* ---- Learn from sources (distill a skill from directories) ---- */
|
||||
const handleLearn = useCallback(async () => {
|
||||
const paths = learnPaths
|
||||
.split(/[\n,]+/)
|
||||
.map((p) => p.trim())
|
||||
.filter(Boolean);
|
||||
if (paths.length === 0) {
|
||||
showToast("Enter at least one directory path", "error");
|
||||
return;
|
||||
}
|
||||
setLearnBusy(true);
|
||||
setLearnResult(null);
|
||||
try {
|
||||
const res = await api.learnSkill(
|
||||
{
|
||||
paths,
|
||||
hint: learnHint || undefined,
|
||||
run: learnRun,
|
||||
min_tier: learnMinTier,
|
||||
},
|
||||
selectedProfile || undefined,
|
||||
);
|
||||
setLearnResult(res);
|
||||
if (res.success) {
|
||||
showToast(`Learned skill: ${res.skill_name}`, "success");
|
||||
api
|
||||
.getSkills(selectedProfile || undefined)
|
||||
.then(setSkills)
|
||||
.catch(() => {});
|
||||
} else {
|
||||
showToast(res.error || "Distillation did not commit a skill", "error");
|
||||
}
|
||||
} catch {
|
||||
showToast("Learn request failed", "error");
|
||||
} finally {
|
||||
setLearnBusy(false);
|
||||
}
|
||||
}, [learnPaths, learnHint, learnRun, learnMinTier, selectedProfile, showToast]);
|
||||
|
||||
/* ---- Derived data ---- */
|
||||
const lowerSearch = search.toLowerCase();
|
||||
const isSearching = search.trim().length > 0;
|
||||
|
|
@ -501,6 +549,15 @@ export default function SkillsPage() {
|
|||
>
|
||||
New skill
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
outlined
|
||||
className="uppercase"
|
||||
onClick={() => setLearnOpen(true)}
|
||||
prefix={<Sparkles />}
|
||||
>
|
||||
Learn from sources
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
|
@ -631,6 +688,103 @@ export default function SkillsPage() {
|
|||
onClose={() => setEditorOpen(false)}
|
||||
onSaved={handleEditorSaved}
|
||||
/>
|
||||
<Dialog open={learnOpen} onOpenChange={(o) => { if (!o) setLearnOpen(false); }}>
|
||||
<DialogContent className="max-w-lg">
|
||||
<DialogHeader>
|
||||
<DialogTitle className="flex items-center gap-2">
|
||||
<Sparkles className="h-4 w-4" /> Learn a skill from sources
|
||||
</DialogTitle>
|
||||
<DialogDescription>
|
||||
Point Hermes at directories of source material (code, API docs,
|
||||
manuals, PDFs). It distills a draft skill, verifies it in a
|
||||
sandbox, and commits it when verification passes. Paths are
|
||||
resolved on the host the dashboard runs on.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
<div className="flex flex-col gap-3">
|
||||
<div>
|
||||
<label className="text-xs uppercase text-muted-foreground">
|
||||
Source paths (one per line, or comma-separated)
|
||||
</label>
|
||||
<textarea
|
||||
className="mt-1 w-full h-24 rounded-none border bg-transparent p-2 text-xs font-mono"
|
||||
placeholder={"/path/to/repo\n/path/to/api-docs"}
|
||||
value={learnPaths}
|
||||
onChange={(e) => setLearnPaths(e.target.value)}
|
||||
disabled={learnBusy}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-xs uppercase text-muted-foreground">Hint (optional)</label>
|
||||
<Input
|
||||
className="mt-1 h-8 rounded-none text-xs"
|
||||
placeholder="e.g. focus on the auth flow"
|
||||
value={learnHint}
|
||||
onChange={(e) => setLearnHint(e.target.value)}
|
||||
disabled={learnBusy}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<div>
|
||||
<label className="text-xs uppercase text-muted-foreground">Minimum verification tier</label>
|
||||
<select
|
||||
className="mt-1 block h-8 rounded-none border bg-transparent px-2 text-xs"
|
||||
value={learnMinTier}
|
||||
onChange={(e) => setLearnMinTier(e.target.value as "executed" | "checked" | "unverified")}
|
||||
disabled={learnBusy}
|
||||
>
|
||||
<option value="unverified">unverified (commit any valid draft)</option>
|
||||
<option value="checked">checked (parse + commands resolve)</option>
|
||||
<option value="executed">executed (snippets ran in sandbox)</option>
|
||||
</select>
|
||||
</div>
|
||||
<label className="flex items-center gap-2 text-xs">
|
||||
<Switch checked={learnRun} onCheckedChange={setLearnRun} disabled={learnBusy} />
|
||||
Run snippets in sandbox
|
||||
</label>
|
||||
</div>
|
||||
{learnResult && (
|
||||
<div className="rounded-none border p-2 text-xs">
|
||||
{learnResult.success ? (
|
||||
<div className="flex items-center gap-2 text-green-500">
|
||||
<CheckCircle2 className="h-3.5 w-3.5" />
|
||||
Learned <span className="font-mono">{learnResult.skill_name}</span>
|
||||
{learnResult.verification && (
|
||||
<Badge tone="secondary" className="text-[10px]">
|
||||
{learnResult.verification.tier}
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-start gap-2 text-amber-500">
|
||||
<AlertTriangle className="h-3.5 w-3.5 mt-0.5" />
|
||||
<span>{learnResult.error || "Not committed"}</span>
|
||||
</div>
|
||||
)}
|
||||
<div className="mt-1 text-muted-foreground">
|
||||
{learnResult.sources_ingested} files ingested
|
||||
{learnResult.elapsed_seconds
|
||||
? ` · ${learnResult.elapsed_seconds.toFixed(1)}s`
|
||||
: ""}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex justify-end gap-2 pt-1">
|
||||
<Button outlined size="xs" onClick={() => setLearnOpen(false)} disabled={learnBusy}>
|
||||
Close
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
onClick={handleLearn}
|
||||
disabled={learnBusy}
|
||||
prefix={learnBusy ? <Loader2 className="animate-spin" /> : <Sparkles />}
|
||||
>
|
||||
{learnBusy ? "Distilling…" : "Distill skill"}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
<PluginSlot name="skills:bottom" />
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -71,6 +71,69 @@ hermes chat --toolsets skills -q "What skills do you have?"
|
|||
hermes chat --toolsets skills -q "Show me the axolotl skill"
|
||||
```
|
||||
|
||||
## Learning skills from sources (`/learn`)
|
||||
|
||||
Instead of writing a skill by hand, you can point Hermes at directories of
|
||||
source material — source code, API docs, instruction manuals, PDFs, config
|
||||
samples — and have it **distill a reusable skill for you**. Hermes ingests the
|
||||
material, synthesizes a draft `SKILL.md`, **verifies it in a throwaway
|
||||
sandbox**, and only commits the skill when verification passes.
|
||||
|
||||
```bash
|
||||
# CLI subcommand:
|
||||
hermes learn ~/projects/widget-api ~/docs/widget-openapi --hint "focus on the auth flow"
|
||||
|
||||
# In-session slash command (CLI, TUI, and every messaging platform):
|
||||
/learn ~/projects/widget-api --hint "the fetch + create flow"
|
||||
```
|
||||
|
||||
It also lives in the dashboard: open the **Skills** tab and click
|
||||
**Learn from sources**.
|
||||
|
||||
### Verification tiers
|
||||
|
||||
`/learn` never claims a skill was "tested" when it was only parsed. Each
|
||||
distilled skill is stamped with the verification **tier** it actually reached:
|
||||
|
||||
| Tier | Meaning |
|
||||
|------|---------|
|
||||
| `executed` | Shell snippets from the draft actually ran (rc 0) in a sandbox |
|
||||
| `checked` | Frontmatter valid, snippets parse, referenced commands resolve on PATH |
|
||||
| `unverified` | A valid skill, but nothing runnable to test |
|
||||
| `failed` | The draft did not pass basic validation (not committed) |
|
||||
|
||||
By default the commit floor is `checked` — drafts below it are shown but not
|
||||
saved. Lower it with `--min-tier unverified` to commit any valid draft, or
|
||||
raise expectations with `--min-tier executed`.
|
||||
|
||||
### Running snippets in the sandbox
|
||||
|
||||
Pass `--run` (CLI/TUI) or toggle **Run snippets in sandbox** (dashboard) to let
|
||||
Hermes execute the draft's read-only inspection snippets (`--version`,
|
||||
`--help`, `ls`, etc.) in an isolated temp directory, so it can reach the
|
||||
`executed` tier. Destructive or network-mutating commands are never auto-run.
|
||||
Over a messaging gateway, `--run` is restricted to admin users since it
|
||||
executes shell commands on the gateway host.
|
||||
|
||||
### Configuration
|
||||
|
||||
`/learn` resolves its synthesis model the same way every auxiliary task does
|
||||
(main-model-first). Tune limits under `skills.distill` in `config.yaml`:
|
||||
|
||||
```yaml
|
||||
skills:
|
||||
distill:
|
||||
max_files: 400 # cap on files walked per run
|
||||
max_file_bytes: 64000 # per-file read cap
|
||||
corpus_budget: 180000 # total chars handed to the synthesis model
|
||||
snippet_timeout: 15 # sandbox shell-snippet timeout (seconds)
|
||||
temperature: 0.2
|
||||
max_tokens: 4000
|
||||
```
|
||||
|
||||
To route distillation through a specific model regardless of your main model,
|
||||
set `auxiliary.skill_distill.provider` / `auxiliary.skill_distill.model`.
|
||||
|
||||
## Progressive Disclosure
|
||||
|
||||
Skills use a token-efficient loading pattern:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue