mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-22 02:01:44 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7ddda26bb4 |
@@ -364,6 +364,48 @@ for i in 1 2 3 4 5; do
|
|||||||
done
|
done
|
||||||
assert_grep "$ROOT/journal.jsonl" '"active_skills":\["caveman"\]' "active_skills payload includes invoked skill"
|
assert_grep "$ROOT/journal.jsonl" '"active_skills":\["caveman"\]' "active_skills payload includes invoked skill"
|
||||||
|
|
||||||
|
# --- Test 24: task_completed fires on diverse multi-tool task ---
|
||||||
|
echo "Test 24: task_completed after 5 tools / 3 kinds / no corrections"
|
||||||
|
reset_state
|
||||||
|
for kind in Bash Read Edit Write Grep; do
|
||||||
|
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"$kind\",\"tool_input\":{},\"session_id\":\"sT\",\"cwd\":\"/tmp/x\"}" \
|
||||||
|
| HOOK_RUN >/dev/null 2>&1 || true
|
||||||
|
done
|
||||||
|
echo '{"hook_event_name":"UserPromptSubmit","prompt":"go on","session_id":"sT","cwd":"/tmp/x"}' \
|
||||||
|
| HOOK_RUN >/dev/null 2>&1 || true
|
||||||
|
assert_grep "$ROOT/journal.jsonl" '"type":"task_completed"' "5 tools + 5 kinds + 0 corrections emits task_completed"
|
||||||
|
|
||||||
|
# --- Test 25: task_completed suppressed when tool diversity < 3 ---
|
||||||
|
echo "Test 25: task_completed suppressed on single-tool run"
|
||||||
|
reset_state
|
||||||
|
for i in 1 2 3 4 5; do
|
||||||
|
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"Edit\",\"tool_input\":{\"file_path\":\"/tmp/$i\"},\"session_id\":\"sT2\",\"cwd\":\"/tmp/x\"}" \
|
||||||
|
| HOOK_RUN >/dev/null 2>&1 || true
|
||||||
|
done
|
||||||
|
echo '{"hook_event_name":"UserPromptSubmit","prompt":"go on","session_id":"sT2","cwd":"/tmp/x"}' \
|
||||||
|
| HOOK_RUN >/dev/null 2>&1 || true
|
||||||
|
if grep -qE '"type":"task_completed"' "$ROOT/journal.jsonl"; then
|
||||||
|
echo " FAIL: task_completed fired on single-tool task"; FAIL=$((FAIL+1))
|
||||||
|
else
|
||||||
|
echo " PASS: task_completed suppressed (low tool diversity)"; PASS=$((PASS+1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Test 26: task_completed suppressed when correction fires mid-task ---
|
||||||
|
echo "Test 26: task_completed suppressed after correction"
|
||||||
|
reset_state
|
||||||
|
for kind in Bash Read Edit Write Grep; do
|
||||||
|
echo "{\"hook_event_name\":\"PostToolUse\",\"tool_name\":\"$kind\",\"tool_input\":{},\"session_id\":\"sT3\",\"cwd\":\"/tmp/x\"}" \
|
||||||
|
| HOOK_RUN >/dev/null 2>&1 || true
|
||||||
|
done
|
||||||
|
# Correction phrase resets task_corrections inside the same UserPromptSubmit cycle, so the prior run is disqualified.
|
||||||
|
echo '{"hook_event_name":"UserPromptSubmit","prompt":"no, undo that","session_id":"sT3","cwd":"/tmp/x"}' \
|
||||||
|
| HOOK_RUN >/dev/null 2>&1 || true
|
||||||
|
if grep -qE '"type":"task_completed"' "$ROOT/journal.jsonl"; then
|
||||||
|
echo " FAIL: task_completed fired despite correction on the closing prompt"; FAIL=$((FAIL+1))
|
||||||
|
else
|
||||||
|
echo " PASS: task_completed suppressed by correction"; PASS=$((PASS+1))
|
||||||
|
fi
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "Results: $PASS passed, $FAIL failed"
|
echo "Results: $PASS passed, $FAIL failed"
|
||||||
[ "$FAIL" = "0" ]
|
[ "$FAIL" = "0" ]
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ The hook emits these `type` values into the journal:
|
|||||||
| `subagent_dispatch_pattern` | same subagent dispatched ≥3× cumulatively | subagent_type |
|
| `subagent_dispatch_pattern` | same subagent dispatched ≥3× cumulatively | subagent_type |
|
||||||
| `correction_free_streak` | 5 clean UserPromptSubmits in a row (no correction phrase) | `active_skills[0]` |
|
| `correction_free_streak` | 5 clean UserPromptSubmits in a row (no correction phrase) | `active_skills[0]` |
|
||||||
| `clean_recovery` | 3 clean PostToolUse events after a `tool_error_loop`/`dead_end`/`retry_loop` | (`recovered_from`, `active_skills[0]`) |
|
| `clean_recovery` | 3 clean PostToolUse events after a `tool_error_loop`/`dead_end`/`retry_loop` | (`recovered_from`, `active_skills[0]`) |
|
||||||
|
| `task_completed` | UserPromptSubmit closes a run of ≥5 tool calls with ≥3 distinct tool kinds and 0 corrections | sorted `tool_kinds` tuple |
|
||||||
|
|
||||||
## Process
|
## Process
|
||||||
|
|
||||||
@@ -62,6 +63,7 @@ The hook emits these `type` values into the journal:
|
|||||||
- `subagent_dispatch_pattern`: cluster by `subagent_type`.
|
- `subagent_dispatch_pattern`: cluster by `subagent_type`.
|
||||||
- `correction_free_streak`: cluster by `active_skills[0]`. Treat ≥3 streaks across ≥2 sessions naming the same skill as cross-session evidence.
|
- `correction_free_streak`: cluster by `active_skills[0]`. Treat ≥3 streaks across ≥2 sessions naming the same skill as cross-session evidence.
|
||||||
- `clean_recovery`: cluster by (`recovered_from`, `active_skills[0]`). A win cluster qualifies for `skill_edit` only when the named skill exists in `skills_root`.
|
- `clean_recovery`: cluster by (`recovered_from`, `active_skills[0]`). A win cluster qualifies for `skill_edit` only when the named skill exists in `skills_root`.
|
||||||
|
- `task_completed`: cluster by sorted `tool_kinds` tuple (the multi-tool recipe). Single entry qualifies for `skill_new` proposal (drafting protocol applies). Cross-session evidence requires ≥2 entries from distinct sessions with same tuple — without it, proposal queues, never auto-applies. Run the existing skill-overlap rule before drafting: if the recipe matches an existing skill's name/description tokens, route to `skill_edit` instead.
|
||||||
5. **Multi-axis correlation**: for each session that produced ≥2 distinct struggle types (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`), tag clusters from that session as `multi_axis: true`. This grants +1 confidence at scoring.
|
5. **Multi-axis correlation**: for each session that produced ≥2 distinct struggle types (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`), tag clusters from that session as `multi_axis: true`. This grants +1 confidence at scoring.
|
||||||
6. For each cluster qualifying under the rubric — ≥3 occurrences across ≥2 sessions, OR (for struggle types) ≥1 entry within a single session, OR (for `correction`) ≥3 occurrences across ≥2 cwds:
|
6. For each cluster qualifying under the rubric — ≥3 occurrences across ≥2 sessions, OR (for struggle types) ≥1 entry within a single session, OR (for `correction`) ≥3 occurrences across ≥2 cwds:
|
||||||
a. If cluster topic matches a rejected idea via the rejected-ideas fuzzy set (≥2 token overlap with rejection's `# Why`), skip with reason `"rejected-similar"`.
|
a. If cluster topic matches a rejected idea via the rejected-ideas fuzzy set (≥2 token overlap with rejection's `# Why`), skip with reason `"rejected-similar"`.
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ const CORRECTION_FREE_THRESHOLD = 5;
|
|||||||
const CLEAN_RECOVERY_WINDOW = 3;
|
const CLEAN_RECOVERY_WINDOW = 3;
|
||||||
const STRUGGLE_TYPES = new Set(["tool_error_loop", "dead_end", "retry_loop"]);
|
const STRUGGLE_TYPES = new Set(["tool_error_loop", "dead_end", "retry_loop"]);
|
||||||
const ACTIVE_SKILLS_LOOKBACK = 10;
|
const ACTIVE_SKILLS_LOOKBACK = 10;
|
||||||
|
const TASK_TOOL_MIN = 5;
|
||||||
|
const TASK_DIVERSITY_MIN = 3;
|
||||||
const STATE_MAX_BYTES = 1_000_000;
|
const STATE_MAX_BYTES = 1_000_000;
|
||||||
|
|
||||||
function safeRead(path, fallback) {
|
function safeRead(path, fallback) {
|
||||||
@@ -133,6 +135,9 @@ function resetSessionLocal(state) {
|
|||||||
state.correctionFreeCounter = 0;
|
state.correctionFreeCounter = 0;
|
||||||
state.recoveryWatch = null;
|
state.recoveryWatch = null;
|
||||||
state.tool_window = [];
|
state.tool_window = [];
|
||||||
|
state.task_tool_kinds = {};
|
||||||
|
state.task_tool_count = 0;
|
||||||
|
state.task_corrections = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function ensureStateDefaults(state) {
|
function ensureStateDefaults(state) {
|
||||||
@@ -149,6 +154,9 @@ function ensureStateDefaults(state) {
|
|||||||
if (typeof state.correctionFreeCounter !== "number") state.correctionFreeCounter = 0;
|
if (typeof state.correctionFreeCounter !== "number") state.correctionFreeCounter = 0;
|
||||||
if (state.recoveryWatch === undefined) state.recoveryWatch = null;
|
if (state.recoveryWatch === undefined) state.recoveryWatch = null;
|
||||||
if (!Array.isArray(state.activity_ring)) state.activity_ring = [];
|
if (!Array.isArray(state.activity_ring)) state.activity_ring = [];
|
||||||
|
if (!state.task_tool_kinds || typeof state.task_tool_kinds !== "object") state.task_tool_kinds = {};
|
||||||
|
if (typeof state.task_tool_count !== "number") state.task_tool_count = 0;
|
||||||
|
if (typeof state.task_corrections !== "number") state.task_corrections = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
@@ -178,6 +186,7 @@ function main() {
|
|||||||
prev_file: last.file || null,
|
prev_file: last.file || null,
|
||||||
});
|
});
|
||||||
state.correctionFreeCounter = 0;
|
state.correctionFreeCounter = 0;
|
||||||
|
state.task_corrections += 1;
|
||||||
} else {
|
} else {
|
||||||
state.correctionFreeCounter += 1;
|
state.correctionFreeCounter += 1;
|
||||||
if (state.correctionFreeCounter >= CORRECTION_FREE_THRESHOLD) {
|
if (state.correctionFreeCounter >= CORRECTION_FREE_THRESHOLD) {
|
||||||
@@ -190,6 +199,22 @@ function main() {
|
|||||||
state.correctionFreeCounter = 0;
|
state.correctionFreeCounter = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Evaluate prior task (work between previous UserPromptSubmit and this one).
|
||||||
|
const taskKinds = Object.keys(state.task_tool_kinds);
|
||||||
|
if (state.task_tool_count >= TASK_TOOL_MIN &&
|
||||||
|
taskKinds.length >= TASK_DIVERSITY_MIN &&
|
||||||
|
state.task_corrections === 0) {
|
||||||
|
appendJournal({
|
||||||
|
ts, session, cwd, type: "task_completed",
|
||||||
|
tool_count: state.task_tool_count,
|
||||||
|
tool_kinds: taskKinds,
|
||||||
|
active_skills: activeNames(state, "skill"),
|
||||||
|
active_agents: activeNames(state, "agent"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
state.task_tool_kinds = {};
|
||||||
|
state.task_tool_count = 0;
|
||||||
|
state.task_corrections = 0;
|
||||||
resetFrictionCounters(state);
|
resetFrictionCounters(state);
|
||||||
} else if (event === "PreToolUse") {
|
} else if (event === "PreToolUse") {
|
||||||
const tool = input.tool_name;
|
const tool = input.tool_name;
|
||||||
@@ -293,6 +318,9 @@ function main() {
|
|||||||
state.dead_end_emitted = true;
|
state.dead_end_emitted = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
state.task_tool_count += 1;
|
||||||
|
state.task_tool_kinds[tool] = (state.task_tool_kinds[tool] || 0) + 1;
|
||||||
|
|
||||||
if (struggleEmittedThisTurn) {
|
if (struggleEmittedThisTurn) {
|
||||||
state.recoveryWatch = { recovered_from: struggleEmittedThisTurn, since_ts: ts, clean_count: 0, window_tools: [] };
|
state.recoveryWatch = { recovered_from: struggleEmittedThisTurn, since_ts: ts, clean_count: 0, window_tools: [] };
|
||||||
} else if (state.recoveryWatch) {
|
} else if (state.recoveryWatch) {
|
||||||
|
|||||||
Reference in New Issue
Block a user