mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-05 22:49:28 +00:00
feat(adam): smarter signals & clustering
- New signal types in hooks/adam-observe.mjs: - silent_drift: 5 consecutive read-only PostToolUse without an action tool - error_after_recovery: same error fingerprint returns within 5 events of clean_recovery - Severity-weighted scoring in adam/scripts/adam-score.mjs: - SEVERITY_DIVISORS exported per struggle signal type - Per-session severity_sum + severity_by_type added to JSON output - Skill-attribution clustering in agents/adam.md: - Sub-cluster struggle signals on active_skills[0] - New struggle-driven skill_edit variant (always queues, never auto-applies) - Rubric updates: - +1 for cluster severity-sum >= 10, additional +1 for >= 32 - +1 for skill-attributed sub-cluster naming an existing skill - silent_drift + error_after_recovery added to struggle signal list - Window: silent_drift 14d, error_after_recovery 30d - Tests: 94 passing (78-82 new) Backward compat: entries without count default to severity 1. Existing win-driven skill_edit gate untouched. No journal migration.
This commit is contained in:
@@ -43,10 +43,32 @@ export const NEGATIVE_SIGNAL_TYPES = new Set([
|
||||
"retry_loop",
|
||||
"build_loop",
|
||||
"weak_agent",
|
||||
"silent_drift",
|
||||
"error_after_recovery",
|
||||
]);
|
||||
|
||||
export const REINFORCEMENT_THRESHOLD = 3;
|
||||
|
||||
// Severity divisor per struggle signal type. Severity = max(1, floor(count / divisor)).
|
||||
// Entries without `count` default to severity 1. Source of truth — referenced by
|
||||
// agents/adam.md (Confidence rubric → severity-sum bullets).
|
||||
export const SEVERITY_DIVISORS = {
|
||||
dead_end: 8,
|
||||
edit_churn: 4,
|
||||
tool_error_loop: 3,
|
||||
retry_loop: 3,
|
||||
weak_agent: 2,
|
||||
build_loop: 1,
|
||||
};
|
||||
|
||||
export function entrySeverity(entry) {
|
||||
if (!entry || typeof entry !== "object") return 1;
|
||||
const divisor = SEVERITY_DIVISORS[entry.type];
|
||||
if (!divisor) return 1;
|
||||
const count = typeof entry.count === "number" && entry.count > 0 ? entry.count : 1;
|
||||
return Math.max(1, Math.floor(count / divisor));
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = { home: null, input: null, help: false };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
@@ -84,11 +106,22 @@ export function computeSessionScores(entries) {
|
||||
const sid = e.session || e.session_id || "";
|
||||
if (!sid) continue;
|
||||
if (!bySession.has(sid)) {
|
||||
bySession.set(sid, { session_id: sid, negative_count: 0, task_completed_count: 0 });
|
||||
bySession.set(sid, {
|
||||
session_id: sid,
|
||||
negative_count: 0,
|
||||
task_completed_count: 0,
|
||||
severity_sum: 0,
|
||||
severity_by_type: {},
|
||||
});
|
||||
}
|
||||
const slot = bySession.get(sid);
|
||||
if (e.type === "task_completed") slot.task_completed_count++;
|
||||
else if (NEGATIVE_SIGNAL_TYPES.has(e.type)) slot.negative_count++;
|
||||
else if (NEGATIVE_SIGNAL_TYPES.has(e.type)) {
|
||||
slot.negative_count++;
|
||||
const sev = entrySeverity(e);
|
||||
slot.severity_sum += sev;
|
||||
slot.severity_by_type[e.type] = (slot.severity_by_type[e.type] || 0) + sev;
|
||||
}
|
||||
}
|
||||
const out = [];
|
||||
for (const slot of bySession.values()) {
|
||||
|
||||
@@ -29,6 +29,8 @@ export const SIGNAL_WINDOWS_DAYS = {
|
||||
build_loop: 30,
|
||||
weak_agent: 30,
|
||||
subagent_dispatch_pattern: 30,
|
||||
silent_drift: 14,
|
||||
error_after_recovery: 30,
|
||||
correction_free_streak: 60,
|
||||
clean_recovery: 60,
|
||||
task_completed: 60,
|
||||
|
||||
@@ -1388,6 +1388,105 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Test 78: silent_drift fires after 5 consecutive read-only tools ---
|
||||
echo "Test 78: silent_drift after 5 reads"
|
||||
reset_state
|
||||
for i in 1 2 3 4 5; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/r-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sSD\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
assert_grep "$ROOT/journal.jsonl" '"type":"silent_drift"' "5 consecutive reads emit silent_drift"
|
||||
assert_grep "$ROOT/journal.jsonl" '"read_count":5' "silent_drift entry records read_count"
|
||||
|
||||
# --- Test 79: silent_drift counter resets on action tool ---
|
||||
echo "Test 79: silent_drift counter resets on action tool"
|
||||
reset_state
|
||||
for i in 1 2 3 4; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/r-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sSDR\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# Action tool — should reset
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Edit","tool_input":{"file_path":"/tmp/x"},"tool_response":{"content":"ok"},"session_id":"sSDR","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
for i in 1 2 3 4; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/rb-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sSDR\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
if grep -qE '"type":"silent_drift"' "$ROOT/journal.jsonl"; then
|
||||
echo " FAIL: silent_drift fired despite action tool reset"; FAIL=$((FAIL+1))
|
||||
else
|
||||
echo " PASS: silent_drift suppressed by intervening action tool"; PASS=$((PASS+1))
|
||||
fi
|
||||
|
||||
# --- Test 80: error_after_recovery fires when same fp returns post-clean_recovery ---
|
||||
echo "Test 80: error_after_recovery fires when fp returns after recovery"
|
||||
reset_state
|
||||
# Build a tool_error_loop with ENOENT
|
||||
for i in 1 2 3; do
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat missing"},"tool_response":{"is_error":true,"content":"cat: missing: No such file or directory"},"session_id":"sEAR","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# 3 clean tools → clean_recovery
|
||||
for i in 1 2 3; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/ok-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sEAR\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# Same fp returns within window
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat other"},"tool_response":{"is_error":true,"content":"cat: other: No such file or directory"},"session_id":"sEAR","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
assert_grep "$ROOT/journal.jsonl" '"type":"error_after_recovery"' "same fp after clean_recovery emits error_after_recovery"
|
||||
|
||||
# --- Test 81: error_after_recovery does NOT fire after window expires ---
|
||||
echo "Test 81: error_after_recovery suppressed beyond window"
|
||||
reset_state
|
||||
for i in 1 2 3; do
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat missing"},"tool_response":{"is_error":true,"content":"cat: missing: No such file or directory"},"session_id":"sEARW","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
for i in 1 2 3; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/ok-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sEARW\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# UserPromptSubmit resets tools_since_user + last_errors so the burn reads don't
|
||||
# trigger a secondary dead_end + clean_recovery cycle (which would create a fresh
|
||||
# recovery within window and cause error_after_recovery to fire legitimately).
|
||||
echo '{"hook_event_name":"UserPromptSubmit","prompt":"keep going","session_id":"sEARW","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
# Burn through the 5-event window with 6 clean reads (session_post_count: 6 → 12)
|
||||
for i in 1 2 3 4 5 6; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/burn-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sEARW\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat other"},"tool_response":{"is_error":true,"content":"cat: other: No such file or directory"},"session_id":"sEARW","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
if grep -qE '"type":"error_after_recovery"' "$ROOT/journal.jsonl"; then
|
||||
echo " FAIL: error_after_recovery fired outside 5-event window"; FAIL=$((FAIL+1))
|
||||
else
|
||||
echo " PASS: error_after_recovery suppressed outside window"; PASS=$((PASS+1))
|
||||
fi
|
||||
|
||||
# --- Test 82: adam-score.mjs reports severity_sum + severity_by_type ---
|
||||
echo "Test 82: severity-sum reporting in score.mjs"
|
||||
SEV_TMP="$(mktemp)"
|
||||
cat > "$SEV_TMP" <<'EOF'
|
||||
{"ts":"2026-05-12T10:00:00Z","session":"sSEV","type":"dead_end","count":64}
|
||||
{"ts":"2026-05-12T10:01:00Z","session":"sSEV","type":"edit_churn","count":8}
|
||||
{"ts":"2026-05-12T10:02:00Z","session":"sSEV","type":"tool_error_loop","count":3,"fp":"ENOENT:abc"}
|
||||
EOF
|
||||
out=$(SCORE_RUN --input "$SEV_TMP" 2>/dev/null)
|
||||
rm -f "$SEV_TMP"
|
||||
# Expected: dead_end 64/8=8, edit_churn 8/4=2, tool_error_loop 3/3=1 → sum=11
|
||||
if echo "$out" | grep -q '"severity_sum":11'; then
|
||||
echo " PASS: severity_sum=11 reported"; PASS=$((PASS+1))
|
||||
else
|
||||
echo " FAIL: severity_sum mismatch (got: $out)"; FAIL=$((FAIL+1))
|
||||
fi
|
||||
if echo "$out" | grep -q '"dead_end":8'; then
|
||||
echo " PASS: severity_by_type.dead_end=8"; PASS=$((PASS+1))
|
||||
else
|
||||
echo " FAIL: severity_by_type.dead_end missing/wrong (got: $out)"; FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "Results: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" = "0" ]
|
||||
|
||||
+32
-3
@@ -38,6 +38,8 @@ Per-signal windows (single source of truth: `SIGNAL_WINDOWS_DAYS` in `~/.claude/
|
||||
| `build_loop` | 30 d | build/test failure patterns |
|
||||
| `weak_agent` | 30 d | subagent quality signal |
|
||||
| `subagent_dispatch_pattern` | 30 d | dispatch routing pattern |
|
||||
| `silent_drift` | 14 d | exploration-without-action is task-local |
|
||||
| `error_after_recovery` | 30 d | recovery-then-same-error patterns persist |
|
||||
| `correction_free_streak` | 60 d | wins accumulate slowly |
|
||||
| `clean_recovery` | 60 d | wins accumulate slowly |
|
||||
| `task_completed` | 60 d | recipe wins accumulate slowly |
|
||||
@@ -59,6 +61,8 @@ The hook emits these `type` values into the journal:
|
||||
| `edit_churn` | same file edited 4× in window | file basename |
|
||||
| `build_loop` | 2 build/test/compile commands fail in session | session |
|
||||
| `subagent_dispatch_pattern` | same subagent dispatched ≥3× cumulatively | subagent_type |
|
||||
| `silent_drift` | 5 consecutive read-only PostToolUse without an action tool (reset on action or UserPromptSubmit) | `active_skills[0]` |
|
||||
| `error_after_recovery` | same error fingerprint returns within 5 PostToolUse of a `clean_recovery` | (`recovered_from`, `original_fp`) |
|
||||
| `correction_free_streak` | 5 clean UserPromptSubmits in a row (no correction phrase) | `active_skills[0]` |
|
||||
| `clean_recovery` | 3 clean PostToolUse events after a `tool_error_loop`/`dead_end`/`retry_loop` | (`recovered_from`, `active_skills[0]`) |
|
||||
| `task_completed` | UserPromptSubmit closes a run of ≥5 tool calls with ≥3 distinct tool kinds and 0 corrections | sorted `tool_kinds` tuple |
|
||||
@@ -84,10 +88,17 @@ The hook emits these `type` values into the journal:
|
||||
- `edit_churn`: cluster by file basename pattern (e.g. `*.test.ts`).
|
||||
- `build_loop`: cluster by `session`.
|
||||
- `subagent_dispatch_pattern`: cluster by `subagent_type`.
|
||||
- `silent_drift`: cluster by `active_skills[0]` (empty string when no skill is active).
|
||||
- `error_after_recovery`: cluster by (`recovered_from`, `original_fp`).
|
||||
- `correction_free_streak`: cluster by `active_skills[0]`. Treat ≥3 streaks across ≥2 sessions naming the same skill as cross-session evidence.
|
||||
- `clean_recovery`: cluster by (`recovered_from`, `active_skills[0]`). A win cluster qualifies for `skill_edit` only when the named skill exists in `skills_root`.
|
||||
- `task_completed`: cluster by sorted `tool_kinds` tuple (the multi-tool recipe). Single entry qualifies for `skill_new` proposal (drafting protocol applies). Cross-session evidence requires ≥2 entries from distinct sessions with same tuple — without it, proposal queues, never auto-applies. Run the existing skill-overlap rule before drafting: if the recipe matches an existing skill's name/description tokens, route to `skill_edit` instead.
|
||||
5. **Multi-axis correlation**: for each session that produced ≥2 distinct struggle types (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`), tag clusters from that session as `multi_axis: true`. This grants +1 confidence at scoring.
|
||||
5. **Multi-axis correlation**: for each session that produced ≥2 distinct struggle types (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`, `silent_drift`, `error_after_recovery`), tag clusters from that session as `multi_axis: true`. This grants +1 confidence at scoring.
|
||||
|
||||
5b. **Skill-attribution sub-clustering**: after primary clustering (step 4), for every struggle cluster (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`, `silent_drift`, `error_after_recovery`) that contains entries with non-empty `active_skills[0]`:
|
||||
- Split into per-skill sub-clusters keyed on `active_skills[0]`. Entries with empty `active_skills` stay in the original cluster.
|
||||
- If a sub-cluster has ≥3 entries AND names a skill that exists in `skills_root`, mark it as a candidate for `skill_edit` (struggle-driven variant; see "Struggle-driven `skill_edit` eligibility"). Otherwise treat the parent cluster normally.
|
||||
- The umbrella cluster (cross-skill) still emits its usual proposal type (memory, etc.) — sub-clusters do NOT replace it, they supplement it.
|
||||
6. For each cluster qualifying under the rubric — ≥3 occurrences across ≥2 sessions, OR (for struggle types) ≥1 entry within a single session, OR (for `correction`) ≥3 occurrences across ≥2 cwds:
|
||||
a. If cluster topic matches a rejected idea via the rejected-ideas fuzzy set (≥2 token overlap with rejection's `# Why`), skip with reason `"rejected-similar"`.
|
||||
b. Pull ~20 messages of transcript context from `transcripts_root` to enrich. Never read full transcripts.
|
||||
@@ -254,6 +265,21 @@ A `skill_edit` proposal sets `auto_apply_eligible: true` ONLY when ALL hold:
|
||||
|
||||
If any of (3)–(9) fails: still emit the proposal, but `auto_apply_eligible: false` — main thread queues for review.
|
||||
|
||||
## Struggle-driven `skill_edit` eligibility
|
||||
|
||||
Skill-attribution sub-clustering (step 5b) produces struggle-driven `skill_edit` candidates: a sub-cluster of ≥3 struggle entries all naming the same `active_skills[0]` that exists in `skills_root`. These proposals are emitted but **ALWAYS queue** — `auto_apply_eligible: false` regardless of confidence. Negative evidence on a skill is a weaker basis for self-modification than positive evidence (the skill may be active during friction caused by something else), so the human reviews every one.
|
||||
|
||||
A struggle-driven `skill_edit` proposal MUST:
|
||||
|
||||
1. Set `target` to the matched skill's `SKILL.md` path.
|
||||
2. Cluster severity-sum ≥ 10 (same threshold as the +1 rubric bullet).
|
||||
3. Sub-cluster names exactly one skill (no ambiguity across distinct `active_skills[0]` values).
|
||||
4. `# Proposed change` is an append-only diff adding a `## When struggling` section (naive default body: a checkpoint-or-pause rule appropriate to the dominant signal — e.g. `dead_end` → "After 16 PostToolUse events without UserPromptSubmit, emit a one-line checkpoint summary before continuing.").
|
||||
5. Frontmatter includes `struggle_evidence: "<ts of one source entry naming this skill>"` and `struggle_signals: [<list of signal types in the sub-cluster>]`. The win-driven `win_evidence` field is omitted.
|
||||
6. Subject to the same Per-(skill, fingerprint) cooldown as win-driven `skill_edit`.
|
||||
|
||||
If gate (2) or (3) fails: skip the sub-cluster (the parent cluster still produces its umbrella proposal). The sub-cluster's `source_entries` overlap with the parent's — the apply pipeline handles dedup via the excluded-timestamps set.
|
||||
|
||||
## Per-(skill, fingerprint) cooldown
|
||||
|
||||
The cooldown gate is keyed on **(target_skill, proposal_fingerprint)** — not on target_skill alone. A rejected/applied proposal for skill `X` with fingerprint `A` does NOT block future proposals for skill `X` with fingerprint `B`.
|
||||
@@ -307,9 +333,12 @@ The clustering trace summary (see §"Clustering trace") adds an extra `regressio
|
||||
|
||||
Sum:
|
||||
- Signal repeated ≥3× across ≥2 sessions: **+2**
|
||||
- Struggle signal (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`) appearing ≥1× within a single session: **+2** *(each struggle entry already represents a hook-side threshold crossing — e.g. 8 tools without a prompt, 3 same-args retries, 4 edits to one file. Treat each entry as one piece of evidence. Does not stack with the cross-session bonus.)*
|
||||
- Struggle signal (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`, `silent_drift`, `error_after_recovery`) appearing ≥1× within a single session: **+2** *(each struggle entry already represents a hook-side threshold crossing — e.g. 8 tools without a prompt, 3 same-args retries, 4 edits to one file, 5 read-only tools in a row, same-fp error after a recovery. Treat each entry as one piece of evidence. Does not stack with the cross-session bonus.)*
|
||||
- Transcript contains positive endorsement (`yes`, `exactly`, `do that`, `keep doing`) within 2 messages of related action: **+2**
|
||||
- Multi-axis cluster (≥2 distinct struggle types in same session): **+1**
|
||||
- Cluster severity-sum ≥ 10 (severity per entry = `max(1, floor(count / divisor))` using `SEVERITY_DIVISORS` from `adam-score.mjs` — `dead_end:8, edit_churn:4, tool_error_loop:3, retry_loop:3, weak_agent:2, build_loop:1`; entries without `count` count as 1): **+1**
|
||||
- Cluster severity-sum ≥ 32: **+1** *(additive — a severity-sum of 32 gets +1 from the previous bullet AND +1 here, total +2.)*
|
||||
- Skill-attributed sub-cluster (≥3 entries naming the same `active_skills[0]` that exists in `skills_root`): **+1**
|
||||
- Type-bias penalty from feedback loop (≥3 rejections, applied:rejected ratio <1:2 for this `type`): **-1**
|
||||
- Diagnosis flags `Mismatch: unclear` (causation could not be reconstructed from transcript context): **-1**
|
||||
- Blast radius: low **+1**, medium **0**, high **-1** (default per type — see Proposal types table)
|
||||
@@ -328,7 +357,7 @@ Sum:
|
||||
|---|---|---|---|
|
||||
| `memory` | `~/.claude/projects/-Users-nvm/memory/*.md` | low | yes if conf≥4 AND cross_session |
|
||||
| `skill_new` | new dir under `~/.claude/skills/` | low | yes if conf≥4 AND cross_session |
|
||||
| `skill_edit` | existing skill file | medium | yes if win-evidence + LOC + cooldown gates all pass (see "Win-driven skill_edit eligibility") |
|
||||
| `skill_edit` | existing skill file | medium | yes (win-driven only) if win-evidence + LOC + cooldown gates all pass (see "Win-driven skill_edit eligibility"); struggle-driven variant ALWAYS queues (see "Struggle-driven skill_edit eligibility") |
|
||||
| `nudge` | append to `~/.claude/adam/active-nudges.json` | low | yes when `dead_end_count ≥ 3` in a single session (single-session evidence sufficient; skips cross-session gate). Does NOT modify skills/memories/CLAUDE.md — only seeds a SessionStart reminder for a future session. |
|
||||
| `reinforcement` | append entry to `~/.claude/adam/reinforcements.jsonl` | low | yes if conf≥4 AND blast_radius=low (same gate as memory). Applies via `adam-apply-reinforcement.mjs`; appends one JSONL entry, no code/memory/skill changes. |
|
||||
| `agent_new` | new file under `~/.claude/agents/` | medium | no |
|
||||
|
||||
+59
-1
@@ -87,6 +87,12 @@ function normalizeErrorText(text) {
|
||||
const ERROR_RE = /\b(error|failed|exception|traceback|denied|cannot|unable to|not found|undefined|nullpointer|typeerror|syntaxerror|panic|fatal|enoent|econnrefused|etimedout|eaccess|segfault|crashed|uncaught)\b/i;
|
||||
const BUILD_RE = /\b(build|compile|make|gradle|cargo|tsc|webpack|vite|rollup|pytest|jest|mocha|vitest|go\s+test|npm\s+test|yarn\s+test|npm\s+run\s+build|yarn\s+build|ctest|ninja|bazel)\b/i;
|
||||
const EDIT_TOOLS = new Set(["Edit", "Write", "MultiEdit", "NotebookEdit"]);
|
||||
const READ_ONLY_TOOLS = new Set([
|
||||
"Read", "Grep", "Glob", "ToolSearch", "WebFetch", "WebSearch",
|
||||
"mcp__filepuff__file_read", "mcp__filepuff__file_search",
|
||||
"mcp__filepuff__find_definition", "mcp__filepuff__find_references",
|
||||
"mcp__filepuff__ast_query", "mcp__filepuff__symbol_at", "mcp__filepuff__ping",
|
||||
]);
|
||||
const WINDOW_SIZE = 10;
|
||||
const RETRY_THRESHOLD = 3;
|
||||
const AGENT_RESPAWN_THRESHOLD = 2;
|
||||
@@ -98,6 +104,9 @@ const BUILD_LOOP_THRESHOLD = 2;
|
||||
const SUBAGENT_DISPATCH_THRESHOLD = 3;
|
||||
const CORRECTION_FREE_THRESHOLD = 5;
|
||||
const CLEAN_RECOVERY_WINDOW = 3;
|
||||
const SILENT_DRIFT_THRESHOLD = 5;
|
||||
const ERROR_AFTER_RECOVERY_WINDOW = 5;
|
||||
const RECENT_RECOVERIES_MAX = 3;
|
||||
const STRUGGLE_TYPES = new Set(["tool_error_loop", "dead_end", "retry_loop"]);
|
||||
const ACTIVE_SKILLS_LOOKBACK = 10;
|
||||
const TASK_TOOL_MIN = 5;
|
||||
@@ -268,6 +277,8 @@ function resetFrictionCounters(state) {
|
||||
state.edit_churn_emitted = {};
|
||||
state.build_failure_count = 0;
|
||||
state.build_loop_emitted = false;
|
||||
state.silentDriftCounter = 0;
|
||||
state.silentDriftEmitted = false;
|
||||
}
|
||||
|
||||
function resetSessionLocal(state) {
|
||||
@@ -276,6 +287,8 @@ function resetSessionLocal(state) {
|
||||
state.subagent_dispatch_emitted = {};
|
||||
state.correctionFreeCounter = 0;
|
||||
state.recoveryWatch = null;
|
||||
state.recentRecoveries = [];
|
||||
state.session_post_count = 0;
|
||||
state.tool_window = [];
|
||||
state.task_tool_kinds = {};
|
||||
state.task_tool_count = 0;
|
||||
@@ -299,6 +312,10 @@ function ensureStateDefaults(state) {
|
||||
if (!state.task_tool_kinds || typeof state.task_tool_kinds !== "object") state.task_tool_kinds = {};
|
||||
if (typeof state.task_tool_count !== "number") state.task_tool_count = 0;
|
||||
if (typeof state.task_corrections !== "number") state.task_corrections = 0;
|
||||
if (typeof state.silentDriftCounter !== "number") state.silentDriftCounter = 0;
|
||||
if (typeof state.silentDriftEmitted !== "boolean") state.silentDriftEmitted = false;
|
||||
if (!Array.isArray(state.recentRecoveries)) state.recentRecoveries = [];
|
||||
if (typeof state.session_post_count !== "number") state.session_post_count = 0;
|
||||
}
|
||||
|
||||
function main() {
|
||||
@@ -402,12 +419,24 @@ function main() {
|
||||
}
|
||||
state.tool_window.push(windowEntry);
|
||||
if (state.tool_window.length > WINDOW_SIZE) state.tool_window.shift();
|
||||
state.session_post_count += 1;
|
||||
|
||||
const sameToolArgs = state.tool_window.filter(e => e.tool === tool && e.argsHash === argsHash).length;
|
||||
if (sameToolArgs >= RETRY_THRESHOLD) {
|
||||
emit({ ts, session, cwd, type: "retry_loop", tool, count: sameToolArgs });
|
||||
}
|
||||
|
||||
if (READ_ONLY_TOOLS.has(tool)) {
|
||||
state.silentDriftCounter += 1;
|
||||
if (state.silentDriftCounter >= SILENT_DRIFT_THRESHOLD && !state.silentDriftEmitted) {
|
||||
emit({ ts, session, cwd, type: "silent_drift", read_count: state.silentDriftCounter, last_tool: tool });
|
||||
state.silentDriftEmitted = true;
|
||||
}
|
||||
} else {
|
||||
state.silentDriftCounter = 0;
|
||||
state.silentDriftEmitted = false;
|
||||
}
|
||||
|
||||
if (tool === "Agent") {
|
||||
const subagent = (input.tool_input && (input.tool_input.subagent_type || input.tool_input.agent)) || "unknown";
|
||||
const recent = state.tool_window.slice(-5).filter(e => e.tool === "Agent" && e.subagent === subagent).length;
|
||||
@@ -423,6 +452,23 @@ function main() {
|
||||
const fp = errorFingerprint(input.tool_response);
|
||||
if (fp) {
|
||||
bumpUsage("payload:tool_response_error_seen");
|
||||
if (state.recentRecoveries.length) {
|
||||
const keep = [];
|
||||
for (const rec of state.recentRecoveries) {
|
||||
const tools_since = state.session_post_count - rec.emitted_at_count;
|
||||
if (tools_since > ERROR_AFTER_RECOVERY_WINDOW) continue;
|
||||
if (Array.isArray(rec.fps) && rec.fps.includes(fp)) {
|
||||
emit({
|
||||
ts, session, cwd, type: "error_after_recovery",
|
||||
recovered_from: rec.recovered_from, original_fp: fp,
|
||||
tools_since_recovery: tools_since,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
keep.push(rec);
|
||||
}
|
||||
state.recentRecoveries = keep;
|
||||
}
|
||||
state.last_errors.push({ tool, fp });
|
||||
if (state.last_errors.length > ERROR_RING_SIZE) state.last_errors.shift();
|
||||
const sameError = state.last_errors.filter(e => e.fp === fp).length;
|
||||
@@ -468,7 +514,13 @@ function main() {
|
||||
state.task_tool_kinds[tool] = (state.task_tool_kinds[tool] || 0) + 1;
|
||||
|
||||
if (struggleEmittedThisTurn) {
|
||||
state.recoveryWatch = { recovered_from: struggleEmittedThisTurn, since_ts: ts, clean_count: 0, window_tools: [] };
|
||||
state.recoveryWatch = {
|
||||
recovered_from: struggleEmittedThisTurn,
|
||||
since_ts: ts,
|
||||
clean_count: 0,
|
||||
window_tools: [],
|
||||
watched_fps: state.last_errors.map(e => e.fp),
|
||||
};
|
||||
} else if (state.recoveryWatch) {
|
||||
const turnHadError = fp !== null;
|
||||
if (turnHadError) {
|
||||
@@ -485,6 +537,12 @@ function main() {
|
||||
active_skills: activeNames(state, "skill"),
|
||||
active_agents: activeNames(state, "agent"),
|
||||
});
|
||||
state.recentRecoveries.push({
|
||||
recovered_from: state.recoveryWatch.recovered_from,
|
||||
fps: state.recoveryWatch.watched_fps || [],
|
||||
emitted_at_count: state.session_post_count,
|
||||
});
|
||||
if (state.recentRecoveries.length > RECENT_RECOVERIES_MAX) state.recentRecoveries.shift();
|
||||
state.recoveryWatch = null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user