mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-11 23:29:35 +00:00
feat(adam): smarter signals & clustering
- New signal types in hooks/adam-observe.mjs: - silent_drift: 5 consecutive read-only PostToolUse without an action tool - error_after_recovery: same error fingerprint returns within 5 events of clean_recovery - Severity-weighted scoring in adam/scripts/adam-score.mjs: - SEVERITY_DIVISORS exported per struggle signal type - Per-session severity_sum + severity_by_type added to JSON output - Skill-attribution clustering in agents/adam.md: - Sub-cluster struggle signals on active_skills[0] - New struggle-driven skill_edit variant (always queues, never auto-applies) - Rubric updates: - +1 for cluster severity-sum >= 10, additional +1 for >= 32 - +1 for skill-attributed sub-cluster naming an existing skill - silent_drift + error_after_recovery added to struggle signal list - Window: silent_drift 14d, error_after_recovery 30d - Tests: 94 passing (78-82 new) Backward compat: entries without count default to severity 1. Existing win-driven skill_edit gate untouched. No journal migration.
This commit is contained in:
@@ -43,10 +43,32 @@ export const NEGATIVE_SIGNAL_TYPES = new Set([
|
||||
"retry_loop",
|
||||
"build_loop",
|
||||
"weak_agent",
|
||||
"silent_drift",
|
||||
"error_after_recovery",
|
||||
]);
|
||||
|
||||
export const REINFORCEMENT_THRESHOLD = 3;
|
||||
|
||||
// Severity divisor per struggle signal type. Severity = max(1, floor(count / divisor)).
|
||||
// Entries without `count` default to severity 1. Source of truth — referenced by
|
||||
// agents/adam.md (Confidence rubric → severity-sum bullets).
|
||||
export const SEVERITY_DIVISORS = {
|
||||
dead_end: 8,
|
||||
edit_churn: 4,
|
||||
tool_error_loop: 3,
|
||||
retry_loop: 3,
|
||||
weak_agent: 2,
|
||||
build_loop: 1,
|
||||
};
|
||||
|
||||
export function entrySeverity(entry) {
|
||||
if (!entry || typeof entry !== "object") return 1;
|
||||
const divisor = SEVERITY_DIVISORS[entry.type];
|
||||
if (!divisor) return 1;
|
||||
const count = typeof entry.count === "number" && entry.count > 0 ? entry.count : 1;
|
||||
return Math.max(1, Math.floor(count / divisor));
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = { home: null, input: null, help: false };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
@@ -84,11 +106,22 @@ export function computeSessionScores(entries) {
|
||||
const sid = e.session || e.session_id || "";
|
||||
if (!sid) continue;
|
||||
if (!bySession.has(sid)) {
|
||||
bySession.set(sid, { session_id: sid, negative_count: 0, task_completed_count: 0 });
|
||||
bySession.set(sid, {
|
||||
session_id: sid,
|
||||
negative_count: 0,
|
||||
task_completed_count: 0,
|
||||
severity_sum: 0,
|
||||
severity_by_type: {},
|
||||
});
|
||||
}
|
||||
const slot = bySession.get(sid);
|
||||
if (e.type === "task_completed") slot.task_completed_count++;
|
||||
else if (NEGATIVE_SIGNAL_TYPES.has(e.type)) slot.negative_count++;
|
||||
else if (NEGATIVE_SIGNAL_TYPES.has(e.type)) {
|
||||
slot.negative_count++;
|
||||
const sev = entrySeverity(e);
|
||||
slot.severity_sum += sev;
|
||||
slot.severity_by_type[e.type] = (slot.severity_by_type[e.type] || 0) + sev;
|
||||
}
|
||||
}
|
||||
const out = [];
|
||||
for (const slot of bySession.values()) {
|
||||
|
||||
@@ -29,6 +29,8 @@ export const SIGNAL_WINDOWS_DAYS = {
|
||||
build_loop: 30,
|
||||
weak_agent: 30,
|
||||
subagent_dispatch_pattern: 30,
|
||||
silent_drift: 14,
|
||||
error_after_recovery: 30,
|
||||
correction_free_streak: 60,
|
||||
clean_recovery: 60,
|
||||
task_completed: 60,
|
||||
|
||||
@@ -1388,6 +1388,105 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Test 78: silent_drift fires after 5 consecutive read-only tools ---
|
||||
echo "Test 78: silent_drift after 5 reads"
|
||||
reset_state
|
||||
for i in 1 2 3 4 5; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/r-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sSD\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
assert_grep "$ROOT/journal.jsonl" '"type":"silent_drift"' "5 consecutive reads emit silent_drift"
|
||||
assert_grep "$ROOT/journal.jsonl" '"read_count":5' "silent_drift entry records read_count"
|
||||
|
||||
# --- Test 79: silent_drift counter resets on action tool ---
|
||||
echo "Test 79: silent_drift counter resets on action tool"
|
||||
reset_state
|
||||
for i in 1 2 3 4; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/r-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sSDR\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# Action tool — should reset
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Edit","tool_input":{"file_path":"/tmp/x"},"tool_response":{"content":"ok"},"session_id":"sSDR","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
for i in 1 2 3 4; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/rb-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sSDR\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
if grep -qE '"type":"silent_drift"' "$ROOT/journal.jsonl"; then
|
||||
echo " FAIL: silent_drift fired despite action tool reset"; FAIL=$((FAIL+1))
|
||||
else
|
||||
echo " PASS: silent_drift suppressed by intervening action tool"; PASS=$((PASS+1))
|
||||
fi
|
||||
|
||||
# --- Test 80: error_after_recovery fires when same fp returns post-clean_recovery ---
|
||||
echo "Test 80: error_after_recovery fires when fp returns after recovery"
|
||||
reset_state
|
||||
# Build a tool_error_loop with ENOENT
|
||||
for i in 1 2 3; do
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat missing"},"tool_response":{"is_error":true,"content":"cat: missing: No such file or directory"},"session_id":"sEAR","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# 3 clean tools → clean_recovery
|
||||
for i in 1 2 3; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/ok-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sEAR\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# Same fp returns within window
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat other"},"tool_response":{"is_error":true,"content":"cat: other: No such file or directory"},"session_id":"sEAR","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
assert_grep "$ROOT/journal.jsonl" '"type":"error_after_recovery"' "same fp after clean_recovery emits error_after_recovery"
|
||||
|
||||
# --- Test 81: error_after_recovery does NOT fire after window expires ---
|
||||
echo "Test 81: error_after_recovery suppressed beyond window"
|
||||
reset_state
|
||||
for i in 1 2 3; do
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat missing"},"tool_response":{"is_error":true,"content":"cat: missing: No such file or directory"},"session_id":"sEARW","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
for i in 1 2 3; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/ok-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sEARW\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
# UserPromptSubmit resets tools_since_user + last_errors so the burn reads don't
|
||||
# trigger a secondary dead_end + clean_recovery cycle (which would create a fresh
|
||||
# recovery within window and cause error_after_recovery to fire legitimately).
|
||||
echo '{"hook_event_name":"UserPromptSubmit","prompt":"keep going","session_id":"sEARW","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
# Burn through the 5-event window with 6 clean reads (session_post_count: 6 → 12)
|
||||
for i in 1 2 3 4 5 6; do
|
||||
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Read\",\"tool_input\":{\"file_path\":\"/tmp/burn-$i\"},\"tool_response\":{\"content\":\"ok\"},\"session_id\":\"sEARW\",\"cwd\":\"/tmp/x\"}" \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
done
|
||||
echo '{"hook_event_name":"PostToolUse","tool_name":"Bash","tool_input":{"command":"cat other"},"tool_response":{"is_error":true,"content":"cat: other: No such file or directory"},"session_id":"sEARW","cwd":"/tmp/x"}' \
|
||||
| HOOK_RUN >/dev/null 2>&1 || true
|
||||
if grep -qE '"type":"error_after_recovery"' "$ROOT/journal.jsonl"; then
|
||||
echo " FAIL: error_after_recovery fired outside 5-event window"; FAIL=$((FAIL+1))
|
||||
else
|
||||
echo " PASS: error_after_recovery suppressed outside window"; PASS=$((PASS+1))
|
||||
fi
|
||||
|
||||
# --- Test 82: adam-score.mjs reports severity_sum + severity_by_type ---
|
||||
echo "Test 82: severity-sum reporting in score.mjs"
|
||||
SEV_TMP="$(mktemp)"
|
||||
cat > "$SEV_TMP" <<'EOF'
|
||||
{"ts":"2026-05-12T10:00:00Z","session":"sSEV","type":"dead_end","count":64}
|
||||
{"ts":"2026-05-12T10:01:00Z","session":"sSEV","type":"edit_churn","count":8}
|
||||
{"ts":"2026-05-12T10:02:00Z","session":"sSEV","type":"tool_error_loop","count":3,"fp":"ENOENT:abc"}
|
||||
EOF
|
||||
out=$(SCORE_RUN --input "$SEV_TMP" 2>/dev/null)
|
||||
rm -f "$SEV_TMP"
|
||||
# Expected: dead_end 64/8=8, edit_churn 8/4=2, tool_error_loop 3/3=1 → sum=11
|
||||
if echo "$out" | grep -q '"severity_sum":11'; then
|
||||
echo " PASS: severity_sum=11 reported"; PASS=$((PASS+1))
|
||||
else
|
||||
echo " FAIL: severity_sum mismatch (got: $out)"; FAIL=$((FAIL+1))
|
||||
fi
|
||||
if echo "$out" | grep -q '"dead_end":8'; then
|
||||
echo " PASS: severity_by_type.dead_end=8"; PASS=$((PASS+1))
|
||||
else
|
||||
echo " FAIL: severity_by_type.dead_end missing/wrong (got: $out)"; FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "Results: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" = "0" ]
|
||||
|
||||
Reference in New Issue
Block a user