mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-05 22:49:28 +00:00
012c40b9ab
Storage/window/exclusion split (#7): ISO-week journal rotation with safety fuse replaces size-based rotation (fixes silent under-counting when clusters straddle boundaries). Per-signal sliding windows via adam-window.mjs guard against stale signal accumulation. Legacy YYYY-MM-DD-<ts>.jsonl files remain readable. Error fingerprint normalization (#3): adam-observe.mjs extracts canonical error codes (ENOENT, ECONNREFUSED, etc.) and normalizes paths/timestamps/hex before hashing. 'Connection refused' and 'ECONNREFUSED' now cluster identically. Correction corpus expansion (#1): strong tokens (stop, wrong, undo, try again, different approach, etc.) fire on any occurrence. Weak tokens (no, actually, wait) require negation/contrast co-occurrence within 8 tokens. Kills the 'actually, I think...' false positive. Analyst observability (#6): mandatory clustering trace block; adam-explain.mjs parses to summary/full/json. Cluster decisions now surface rejection reasons (threshold, contradiction, window). Persisted to ~/.claude/adam/last-trace.txt. Dead_end nudge proposal type (#2): single-session auto-apply gate (>=3 dead_end events). Action appends to active-nudges.json, surfaced via adam-nudge.mjs at next SessionStart. Lower blast than skill_edit. Per-(skill, fingerprint) cooldown (#4): adam-cooldown.mjs replaces coarse per-skill check. proposal_fingerprint = djb2(skill_slug + cluster_id + normalized_diff_body). Legacy applied/rejected records gate via 'legacy' fingerprint fallback through resolveSkill helper (handles target_skill, skill, or target: <path>). task_completed scoring integration (#8): adam-score.mjs computes per-session urgency dampener (3 task_completed -> 0.5) and reinforcement candidates (skills cited in >=3 clean completions). New 'reinforcement' proposal type appends to reinforcements.jsonl on apply (no code/memory mutation). A/B effectiveness measurement (#5): every auto-applied edit appends to ab-tracking.jsonl. adam-ab-measure.mjs computes 7d pre/post signal-count delta per entry (improved / neutral / regressed / no_baseline / pending). Analyst surfaces regressions at top of /reflect output. Upgrade UX overhaul (#9): adam-upgrade.mjs implements --list/--diff/--accept /--accept-all. SessionStart nudge prints pending-merge warning when .adam-new files exist (latency ~20ms via fixed shortlist). install.sh emits unmissable final-message hint after creating any .adam-new file. Simplify pass: adam-utils.mjs deduplicates readJsonlSafe / listJsonlFiles / parseFrontmatter across 8 scripts. Net -46 LOC. Test coverage: 30 -> 87 tests. Every new feature has feature-validating assertions (false-case coverage included). T77 statically verifies install.sh references every adam-*.mjs source script (would have caught the missing adam-utils inclusion that review #2 surfaced).
155 lines
5.9 KiB
JavaScript
Executable File
155 lines
5.9 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
// Test driver for ~/.claude/hooks/adam-observe.mjs.
|
|
// Usage: node test-hook.mjs (runs all tests in this file).
|
|
// Spawns the hook with synthesized stdin in a tmp HOME, asserts journal contents.
|
|
import { spawnSync } from "node:child_process";
|
|
import { mkdtempSync, mkdirSync, readFileSync, existsSync, rmSync } from "node:fs";
|
|
import { tmpdir } from "node:os";
|
|
import { join } from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
|
|
const HOOK = join(fileURLToPath(new URL("../../hooks/adam-observe.mjs", import.meta.url)));
|
|
|
|
export function newTmpHome() {
|
|
const home = mkdtempSync(join(tmpdir(), "adam-test-"));
|
|
mkdirSync(join(home, ".claude/adam"), { recursive: true });
|
|
return home;
|
|
}
|
|
|
|
export function feed(home, input) {
|
|
const r = spawnSync("node", [HOOK], {
|
|
input: JSON.stringify(input),
|
|
env: { ...process.env, HOME: home },
|
|
encoding: "utf8",
|
|
timeout: 5000,
|
|
});
|
|
if (r.status !== 0) throw new Error(`hook exit ${r.status}: ${r.stderr}`);
|
|
return r;
|
|
}
|
|
|
|
export function readJournal(home) {
|
|
const p = join(home, ".claude/adam/journal.jsonl");
|
|
if (!existsSync(p)) return [];
|
|
return readFileSync(p, "utf8")
|
|
.trim().split("\n").filter(Boolean).map((l) => JSON.parse(l));
|
|
}
|
|
|
|
export function assert(cond, msg) {
|
|
if (!cond) { console.error(`FAIL: ${msg}`); process.exit(1); }
|
|
console.log(`ok: ${msg}`);
|
|
}
|
|
|
|
export function cleanup(home) { try { rmSync(home, { recursive: true, force: true }); } catch {} }
|
|
|
|
// Tests below this line — added by subsequent tasks.
|
|
|
|
function testCorrectionFreeStreak() {
|
|
const home = newTmpHome();
|
|
try {
|
|
for (let i = 0; i < 5; i++) {
|
|
feed(home, {
|
|
hook_event_name: "UserPromptSubmit",
|
|
session_id: "s1",
|
|
cwd: "/x",
|
|
prompt: `please continue with the work item ${i}`,
|
|
});
|
|
}
|
|
const j = readJournal(home);
|
|
const streaks = j.filter(e => e.type === "correction_free_streak");
|
|
assert(streaks.length === 1, "exactly one correction_free_streak after 5 clean prompts");
|
|
assert(streaks[0].streak === 5, "streak field is 5");
|
|
assert(streaks[0].session === "s1", "session id captured");
|
|
} finally { cleanup(home); }
|
|
}
|
|
|
|
function testStreakResetsOnSessionChange() {
|
|
const home = newTmpHome();
|
|
try {
|
|
// 4 in s1 (counter=4, no streak yet), then 1 in s2 (counter must reset → 1, no streak)
|
|
for (let i = 0; i < 4; i++) feed(home, { hook_event_name: "UserPromptSubmit", session_id: "s1", cwd: "/x", prompt: "ok" });
|
|
feed(home, { hook_event_name: "UserPromptSubmit", session_id: "s2", cwd: "/x", prompt: "ok" });
|
|
const j = readJournal(home);
|
|
assert(j.filter(e => e.type === "correction_free_streak").length === 0, "no streak when session changes mid-streak");
|
|
} finally { cleanup(home); }
|
|
}
|
|
|
|
function testCleanRecovery() {
|
|
const home = newTmpHome();
|
|
try {
|
|
// Trigger tool_error_loop: 3 PostToolUse with same error fingerprint.
|
|
for (let i = 0; i < 3; i++) {
|
|
feed(home, {
|
|
hook_event_name: "PostToolUse",
|
|
session_id: "s1", cwd: "/x",
|
|
tool_name: "Bash",
|
|
tool_input: { command: `echo ${i}` },
|
|
tool_response: { is_error: true, content: "error: command not found" },
|
|
});
|
|
}
|
|
// Then 3 clean PostToolUse events.
|
|
for (let i = 0; i < 3; i++) {
|
|
feed(home, {
|
|
hook_event_name: "PostToolUse",
|
|
session_id: "s1", cwd: "/x",
|
|
tool_name: "Read",
|
|
tool_input: { file_path: `/tmp/ok-${i}` },
|
|
tool_response: { content: "fine" },
|
|
});
|
|
}
|
|
const j = readJournal(home);
|
|
const recs = j.filter(e => e.type === "clean_recovery");
|
|
assert(recs.length === 1, "one clean_recovery emitted after 3 clean tools post-struggle");
|
|
assert(recs[0].recovered_from === "tool_error_loop", "recovered_from set");
|
|
} finally { cleanup(home); }
|
|
}
|
|
|
|
function testRecoveryResetsOnError() {
|
|
const home = newTmpHome();
|
|
try {
|
|
for (let i = 0; i < 3; i++) {
|
|
feed(home, {
|
|
hook_event_name: "PostToolUse", session_id: "s1", cwd: "/x",
|
|
tool_name: "Bash",
|
|
tool_input: { command: `cmd ${i}` },
|
|
tool_response: { is_error: true, content: "error: failed" },
|
|
});
|
|
}
|
|
feed(home, { hook_event_name: "PostToolUse", session_id: "s1", cwd: "/x",
|
|
tool_name: "Read", tool_input: { file_path: "/tmp/a" }, tool_response: { content: "ok" } });
|
|
feed(home, { hook_event_name: "PostToolUse", session_id: "s1", cwd: "/x",
|
|
tool_name: "Read", tool_input: { file_path: "/tmp/b" }, tool_response: { content: "ok" } });
|
|
feed(home, { hook_event_name: "PostToolUse", session_id: "s1", cwd: "/x",
|
|
tool_name: "Bash", tool_input: { command: "x" }, tool_response: { is_error: true, content: "error: again" } });
|
|
feed(home, { hook_event_name: "PostToolUse", session_id: "s1", cwd: "/x",
|
|
tool_name: "Read", tool_input: { file_path: "/tmp/c" }, tool_response: { content: "ok" } });
|
|
const j = readJournal(home);
|
|
assert(j.filter(e => e.type === "clean_recovery").length === 0, "no clean_recovery when error breaks the streak");
|
|
} finally { cleanup(home); }
|
|
}
|
|
|
|
function testActiveSkillsPayload() {
|
|
const home = newTmpHome();
|
|
try {
|
|
feed(home, { hook_event_name: "PreToolUse", session_id: "s1", cwd: "/x",
|
|
tool_name: "Skill", tool_input: { skill: "my-skill" } });
|
|
for (let i = 0; i < 5; i++) {
|
|
feed(home, { hook_event_name: "UserPromptSubmit", session_id: "s1", cwd: "/x", prompt: "ok" });
|
|
}
|
|
const j = readJournal(home);
|
|
const s = j.find(e => e.type === "correction_free_streak");
|
|
assert(s && Array.isArray(s.active_skills) && s.active_skills.includes("my-skill"),
|
|
"correction_free_streak payload includes active skill");
|
|
} finally { cleanup(home); }
|
|
}
|
|
|
|
async function main() {
|
|
testCorrectionFreeStreak();
|
|
testStreakResetsOnSessionChange();
|
|
testCleanRecovery();
|
|
testRecoveryResetsOnError();
|
|
testActiveSkillsPayload();
|
|
console.log("all tests passed");
|
|
}
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) main();
|