mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-26 02:24:12 +00:00
chore(v0.3.3): analyst observability, A/B measurement, journal hygiene
Storage/window/exclusion split (#7): ISO-week journal rotation with safety fuse replaces size-based rotation (fixes silent under-counting when clusters straddle boundaries). Per-signal sliding windows via adam-window.mjs guard against stale signal accumulation. Legacy YYYY-MM-DD-<ts>.jsonl files remain readable. Error fingerprint normalization (#3): adam-observe.mjs extracts canonical error codes (ENOENT, ECONNREFUSED, etc.) and normalizes paths/timestamps/hex before hashing. 'Connection refused' and 'ECONNREFUSED' now cluster identically. Correction corpus expansion (#1): strong tokens (stop, wrong, undo, try again, different approach, etc.) fire on any occurrence. Weak tokens (no, actually, wait) require negation/contrast co-occurrence within 8 tokens. Kills the 'actually, I think...' false positive. Analyst observability (#6): mandatory clustering trace block; adam-explain.mjs parses to summary/full/json. Cluster decisions now surface rejection reasons (threshold, contradiction, window). Persisted to ~/.claude/adam/last-trace.txt. Dead_end nudge proposal type (#2): single-session auto-apply gate (>=3 dead_end events). Action appends to active-nudges.json, surfaced via adam-nudge.mjs at next SessionStart. Lower blast than skill_edit. Per-(skill, fingerprint) cooldown (#4): adam-cooldown.mjs replaces coarse per-skill check. proposal_fingerprint = djb2(skill_slug + cluster_id + normalized_diff_body). Legacy applied/rejected records gate via 'legacy' fingerprint fallback through resolveSkill helper (handles target_skill, skill, or target: <path>). task_completed scoring integration (#8): adam-score.mjs computes per-session urgency dampener (3 task_completed -> 0.5) and reinforcement candidates (skills cited in >=3 clean completions). New 'reinforcement' proposal type appends to reinforcements.jsonl on apply (no code/memory mutation). A/B effectiveness measurement (#5): every auto-applied edit appends to ab-tracking.jsonl. adam-ab-measure.mjs computes 7d pre/post signal-count delta per entry (improved / neutral / regressed / no_baseline / pending). Analyst surfaces regressions at top of /reflect output. Upgrade UX overhaul (#9): adam-upgrade.mjs implements --list/--diff/--accept /--accept-all. SessionStart nudge prints pending-merge warning when .adam-new files exist (latency ~20ms via fixed shortlist). install.sh emits unmissable final-message hint after creating any .adam-new file. Simplify pass: adam-utils.mjs deduplicates readJsonlSafe / listJsonlFiles / parseFrontmatter across 8 scripts. Net -46 LOC. Test coverage: 30 -> 87 tests. Every new feature has feature-validating assertions (false-case coverage included). T77 statically verifies install.sh references every adam-*.mjs source script (would have caught the missing adam-utils inclusion that review #2 surfaced).
This commit is contained in:
+123
-8
@@ -1,16 +1,131 @@
|
||||
#!/usr/bin/env node
|
||||
import { readdirSync } from "node:fs";
|
||||
// adam-nudge.mjs — SessionStart hook. Prints two kinds of reminders:
|
||||
// 1. Pending proposals (≥3 queued in adam/proposals/).
|
||||
// 2. Cross-session nudges (entries in adam/active-nudges.json whose
|
||||
// source_session differs from the current session and that haven't
|
||||
// expired or exhausted their max_displays).
|
||||
import { readdirSync, readFileSync, writeFileSync, existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { homedir } from "node:os";
|
||||
|
||||
const PROPOSALS = join(homedir(), ".claude", "adam", "proposals");
|
||||
const HOME = process.env.HOME || homedir();
|
||||
const CLAUDE_ROOT = join(HOME, ".claude");
|
||||
const ADAM_ROOT = join(CLAUDE_ROOT, "adam");
|
||||
const PROPOSALS = join(ADAM_ROOT, "proposals");
|
||||
const NUDGES_FILE = join(ADAM_ROOT, "active-nudges.json");
|
||||
const STATE_FILE = join(ADAM_ROOT, "state.json");
|
||||
const THRESHOLD = 3;
|
||||
|
||||
try {
|
||||
const PROPOSAL_RE = /^\d{4}-\d{2}-\d{2}-\d{3}-/;
|
||||
const files = readdirSync(PROPOSALS).filter(f => PROPOSAL_RE.test(f) && f.endsWith(".md"));
|
||||
if (files.length >= THRESHOLD) {
|
||||
process.stdout.write(`adam: ${files.length} proposals queued. Run /reflect to review.\n`);
|
||||
// Known installable paths (mirrors install.sh copy_file list). Checking a
|
||||
// fixed shortlist keeps SessionStart latency under control vs full FS walk.
|
||||
const PENDING_CHECK_PATHS = [
|
||||
"hooks/adam-observe.mjs",
|
||||
"hooks/adam-nudge.mjs",
|
||||
"agents/adam.md",
|
||||
"skills/adam-self-improvement/SKILL.md",
|
||||
"commands/reflect.md",
|
||||
"adam/scripts/adam-archive.mjs",
|
||||
"adam/scripts/adam-upgrade.mjs",
|
||||
"adam/scripts/adam-window.mjs",
|
||||
"adam/scripts/adam-explain.mjs",
|
||||
"adam/scripts/adam-nudge-eligibility.mjs",
|
||||
"adam/scripts/adam-cooldown.mjs",
|
||||
"adam/scripts/adam-score.mjs",
|
||||
"adam/scripts/adam-ab-measure.mjs",
|
||||
"adam/scripts/adam-apply-reinforcement.mjs",
|
||||
"adam/tests/run-tests.sh",
|
||||
];
|
||||
|
||||
function readJson(path, fallback) {
|
||||
if (!existsSync(path)) return fallback;
|
||||
try { return JSON.parse(readFileSync(path, "utf8")); } catch { return fallback; }
|
||||
}
|
||||
|
||||
function readSessionInput() {
|
||||
// SessionStart payload arrives on stdin; capture session_id if present.
|
||||
// We don't block on stdin — best-effort, non-blocking.
|
||||
try {
|
||||
const buf = readFileSync(0, "utf8");
|
||||
if (!buf) return null;
|
||||
const parsed = JSON.parse(buf);
|
||||
return parsed && typeof parsed.session_id === "string" ? parsed.session_id : null;
|
||||
} catch { return null; }
|
||||
}
|
||||
|
||||
function emitProposalReminder() {
|
||||
try {
|
||||
const PROPOSAL_RE = /^\d{4}-\d{2}-\d{2}-\d{3}-/;
|
||||
const files = readdirSync(PROPOSALS).filter((f) => PROPOSAL_RE.test(f) && f.endsWith(".md"));
|
||||
if (files.length >= THRESHOLD) {
|
||||
process.stdout.write(`adam: ${files.length} proposals queued. Run /reflect to review.\n`);
|
||||
}
|
||||
} catch { /* proposals dir absent → silent */ }
|
||||
}
|
||||
|
||||
function emitActiveNudges(currentSession) {
|
||||
if (!existsSync(NUDGES_FILE)) return;
|
||||
const raw = readJson(NUDGES_FILE, null);
|
||||
if (!Array.isArray(raw)) return;
|
||||
const now = Date.now();
|
||||
const kept = [];
|
||||
let mutated = false;
|
||||
for (const entry of raw) {
|
||||
if (!entry || typeof entry !== "object") { mutated = true; continue; }
|
||||
const expires = Number(entry.expires_at_ts || 0);
|
||||
if (!expires || expires <= now) { mutated = true; continue; }
|
||||
const sourceSession = entry.source_session || "";
|
||||
const max = Number(entry.max_displays || 0);
|
||||
const used = Number(entry.displays_used || 0);
|
||||
if (max > 0 && used >= max) { mutated = true; continue; }
|
||||
// Cross-session gate: only print when current session differs.
|
||||
if (sourceSession && currentSession && sourceSession === currentSession) {
|
||||
kept.push(entry);
|
||||
continue;
|
||||
}
|
||||
if (typeof entry.message === "string" && entry.message) {
|
||||
process.stdout.write(entry.message + "\n");
|
||||
const nextUsed = used + 1;
|
||||
mutated = true;
|
||||
if (max > 0 && nextUsed >= max) continue; // drop after exhaustion
|
||||
kept.push({ ...entry, displays_used: nextUsed });
|
||||
} else {
|
||||
kept.push(entry);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
if (mutated) {
|
||||
try { writeFileSync(NUDGES_FILE, JSON.stringify(kept, null, 2)); } catch { /* swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
function emitPendingUpgrades() {
|
||||
// Cheap: stat a fixed shortlist of `.adam-new` candidates. Non-fatal.
|
||||
try {
|
||||
let count = 0;
|
||||
for (const rel of PENDING_CHECK_PATHS) {
|
||||
const p = join(CLAUDE_ROOT, `${rel}.adam-new`);
|
||||
try {
|
||||
if (existsSync(p)) count++;
|
||||
} catch { /* per-path swallow */ }
|
||||
}
|
||||
if (count > 0) {
|
||||
process.stdout.write(
|
||||
`[adam] ${count} pending upgrade(s). Review: node ~/.claude/adam/scripts/adam-upgrade.mjs --list\n`
|
||||
);
|
||||
}
|
||||
} catch { /* never break SessionStart */ }
|
||||
}
|
||||
|
||||
function main() {
|
||||
const stdinSession = readSessionInput();
|
||||
const stateSession = (() => {
|
||||
const st = readJson(STATE_FILE, null);
|
||||
return st && typeof st.session_id === "string" ? st.session_id : null;
|
||||
})();
|
||||
const currentSession = stdinSession || stateSession || "";
|
||||
emitProposalReminder();
|
||||
emitActiveNudges(currentSession);
|
||||
emitPendingUpgrades();
|
||||
}
|
||||
|
||||
try { main(); } catch { /* never block SessionStart */ }
|
||||
process.exit(0);
|
||||
|
||||
+174
-17
@@ -14,8 +14,76 @@ const JOURNAL = join(ROOT, "journal.jsonl");
|
||||
const STATE = join(ROOT, "state.json");
|
||||
const USAGE = join(ROOT, "usage.json");
|
||||
const JOURNAL_DIR = join(ROOT, "journal");
|
||||
// Safety fuse only — primary rotation is weekly (ISO Monday 00:00 UTC).
|
||||
// If active journal exceeds this even mid-week, force-rotate to avoid runaway growth.
|
||||
// Override via $ADAM_MAX_JOURNAL_BYTES (used by tests).
|
||||
const MAX_JOURNAL_BYTES = Number(process.env.ADAM_MAX_JOURNAL_BYTES) || 50 * 1024 * 1024;
|
||||
|
||||
const CORRECTION_RE = /\b(no|stop|don't|don\'t|wrong|actually|nope|undo|revert)\b/i;
|
||||
// Strong-correction tokens: any single occurrence in a prompt is a correction.
|
||||
// Weak tokens (no/actually/wait) require co-occurrence with a negation/contrast
|
||||
// token within an 8-token window — see isCorrection() below.
|
||||
const CORRECTION_RE = /\b(stop|don't|don\'t|wrong|nope|undo|revert|incorrect|nevermind|never\s+mind|disregard|redo)\b|that's\s+wrong|hold\s+on|wait\s+wait|try\s+again|different\s+approach|that's\s+not\s+what\s+i\s+meant|not\s+what\s+i\s+wanted|start\s+over|go\s+back/i;
|
||||
const WEAK_CORRECTION_TOKENS = new Set(["no", "actually", "wait"]);
|
||||
const NEGATION_RE = /^(not|wrong|but|isn't|isn\'t|didn't|didn\'t|aren't|aren\'t|won't|won\'t|shouldn't|shouldn\'t|don't|don\'t|nope|bad|broken|fail|fails|failed|failing)$/i;
|
||||
const WEAK_WINDOW = 8;
|
||||
|
||||
function isCorrection(text) {
|
||||
if (!text || typeof text !== "string") return false;
|
||||
if (CORRECTION_RE.test(text)) return true;
|
||||
// Weak-token path: token must co-occur with a negation/contrast within WEAK_WINDOW tokens.
|
||||
const tokens = text.toLowerCase().split(/\s+/).map(t => t.replace(/^[^\w']+|[^\w']+$/g, "")).filter(Boolean);
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
if (!WEAK_CORRECTION_TOKENS.has(tokens[i])) continue;
|
||||
const lo = Math.max(0, i - WEAK_WINDOW);
|
||||
const hi = Math.min(tokens.length - 1, i + WEAK_WINDOW);
|
||||
for (let j = lo; j <= hi; j++) {
|
||||
if (j === i) continue;
|
||||
if (NEGATION_RE.test(tokens[j])) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Canonical error codes. Surface text → code mapping below.
|
||||
const ERROR_CODES = new Set([
|
||||
"ENOENT", "ECONNREFUSED", "ETIMEDOUT", "EACCES", "EPERM", "EADDRINUSE",
|
||||
"ENOTFOUND", "EISDIR", "ENOTDIR", "EEXIST", "EMFILE", "EPIPE", "ECONNRESET"
|
||||
]);
|
||||
const ERROR_CODE_RE = /\b(ENOENT|ECONNREFUSED|ETIMEDOUT|EACCES|EPERM|EADDRINUSE|ENOTFOUND|EISDIR|ENOTDIR|EEXIST|EMFILE|EPIPE|ECONNRESET)\b/;
|
||||
// Phrase → code mapping. First match wins; order matters.
|
||||
const ERROR_PHRASE_MAP = [
|
||||
[/no such file or directory/i, "ENOENT"],
|
||||
[/connection refused/i, "ECONNREFUSED"],
|
||||
[/permission denied/i, "EACCES"],
|
||||
[/address already in use/i, "EADDRINUSE"],
|
||||
[/connection reset/i, "ECONNRESET"],
|
||||
[/operation timed out/i, "ETIMEDOUT"],
|
||||
[/name resolution|getaddrinfo/i, "ENOTFOUND"],
|
||||
];
|
||||
|
||||
function normalizeErrorText(text) {
|
||||
if (!text || typeof text !== "string") return "";
|
||||
let s = text;
|
||||
// ISO timestamps first (contain digits we'd otherwise strip individually).
|
||||
s = s.replace(/\d{4}-\d{2}-\d{2}T[\d:.Z+-]+/g, " ");
|
||||
// Windows paths.
|
||||
s = s.replace(/[A-Z]:\\[^\s]+/g, " ");
|
||||
// Absolute POSIX paths.
|
||||
s = s.replace(/\/[^\s:]+/g, " ");
|
||||
// Hex addresses.
|
||||
s = s.replace(/0x[0-9a-f]+/gi, " ");
|
||||
// Unix epoch (seconds or ms): 10-13 digit runs.
|
||||
s = s.replace(/\b\d{10,13}\b/g, " ");
|
||||
// Line/col refs.
|
||||
s = s.replace(/:\d+(?::\d+)?/g, " ");
|
||||
// UUIDs.
|
||||
s = s.replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, " ");
|
||||
// Large integers (>6 digits) that survived above.
|
||||
s = s.replace(/\b\d{7,}\b/g, " ");
|
||||
// Lowercase + collapse whitespace.
|
||||
s = s.toLowerCase().replace(/\s+/g, " ").trim();
|
||||
return s.slice(0, 80);
|
||||
}
|
||||
const ERROR_RE = /\b(error|failed|exception|traceback|denied|cannot|unable to|not found|undefined|nullpointer|typeerror|syntaxerror|panic|fatal|enoent|econnrefused|etimedout|eaccess|segfault|crashed|uncaught)\b/i;
|
||||
const BUILD_RE = /\b(build|compile|make|gradle|cargo|tsc|webpack|vite|rollup|pytest|jest|mocha|vitest|go\s+test|npm\s+test|yarn\s+test|npm\s+run\s+build|yarn\s+build|ctest|ninja|bazel)\b/i;
|
||||
const EDIT_TOOLS = new Set(["Edit", "Write", "MultiEdit", "NotebookEdit"]);
|
||||
@@ -44,14 +112,69 @@ function safeWrite(path, obj) {
|
||||
try { writeFileSync(path, JSON.stringify(obj)); } catch {}
|
||||
}
|
||||
|
||||
function rotateIfLarge(path, max) {
|
||||
// ISO-8601 week: returns { year, week } for a Date (UTC).
|
||||
// Week 1 = the week containing the first Thursday of the year (Monday-based weeks).
|
||||
function isoWeek(date) {
|
||||
const d = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
// Shift to Thursday in current week (ISO week-numbering year tracks the Thursday).
|
||||
const day = d.getUTCDay() || 7; // 1..7, Mon=1..Sun=7
|
||||
d.setUTCDate(d.getUTCDate() + 4 - day);
|
||||
const isoYear = d.getUTCFullYear();
|
||||
const yearStart = new Date(Date.UTC(isoYear, 0, 1));
|
||||
const week = Math.ceil((((d - yearStart) / 86400000) + 1) / 7);
|
||||
return { year: isoYear, week };
|
||||
}
|
||||
|
||||
function isoWeekTag(date) {
|
||||
const { year, week } = isoWeek(date);
|
||||
return `${year}-W${String(week).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
function firstEntryTs(path) {
|
||||
try {
|
||||
if (existsSync(path) && statSync(path).size > max) {
|
||||
mkdirSync(JOURNAL_DIR, { recursive: true });
|
||||
const today = new Date().toISOString().slice(0, 10);
|
||||
const dest = join(JOURNAL_DIR, `${today}-${Date.now()}.jsonl`);
|
||||
renameSync(path, dest);
|
||||
const buf = readFileSync(path, "utf8");
|
||||
const nl = buf.indexOf("\n");
|
||||
const firstLine = nl === -1 ? buf : buf.slice(0, nl);
|
||||
if (!firstLine.trim()) return null;
|
||||
const obj = JSON.parse(firstLine);
|
||||
return obj && typeof obj.ts === "string" ? obj.ts : null;
|
||||
} catch { return null; }
|
||||
}
|
||||
|
||||
// Weekly ISO rotation + size safety fuse.
|
||||
// - If active journal's first entry is in a different ISO week than now, rotate to
|
||||
// journal/<that-entry's-iso-week>.jsonl and start fresh.
|
||||
// - If active journal exceeds MAX_JOURNAL_BYTES, force-rotate even mid-week
|
||||
// using the current ISO week tag (suffixed with timestamp to avoid clobber).
|
||||
function rotateIfNeeded(path) {
|
||||
try {
|
||||
if (!existsSync(path)) return;
|
||||
const size = statSync(path).size;
|
||||
if (size === 0) return;
|
||||
const now = new Date();
|
||||
const currentTag = isoWeekTag(now);
|
||||
const firstTs = firstEntryTs(path);
|
||||
let rotate = false;
|
||||
let destTag = null;
|
||||
if (firstTs) {
|
||||
const firstTag = isoWeekTag(new Date(firstTs));
|
||||
if (firstTag !== currentTag) {
|
||||
rotate = true;
|
||||
destTag = firstTag;
|
||||
}
|
||||
}
|
||||
if (!rotate && size > MAX_JOURNAL_BYTES) {
|
||||
rotate = true;
|
||||
destTag = `${currentTag}-${Date.now()}`; // safety-fuse: keep mid-week rotations unique
|
||||
}
|
||||
if (!rotate) return;
|
||||
mkdirSync(JOURNAL_DIR, { recursive: true });
|
||||
let dest = join(JOURNAL_DIR, `${destTag}.jsonl`);
|
||||
if (existsSync(dest)) {
|
||||
// Append-merge collision (rare: two mid-week safety-fuse rotations in same ms).
|
||||
dest = join(JOURNAL_DIR, `${destTag}-${Date.now()}.jsonl`);
|
||||
}
|
||||
renameSync(path, dest);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
@@ -65,7 +188,7 @@ function readStdin() {
|
||||
}
|
||||
|
||||
function appendJournal(entry) {
|
||||
rotateIfLarge(JOURNAL, STATE_MAX_BYTES * 5);
|
||||
rotateIfNeeded(JOURNAL);
|
||||
try {
|
||||
appendFileSync(JOURNAL, JSON.stringify(entry) + "\n");
|
||||
} catch {}
|
||||
@@ -107,15 +230,34 @@ function errorFingerprint(toolResponse) {
|
||||
}
|
||||
if (!text) return null;
|
||||
text = text.slice(0, 4000);
|
||||
// ERROR_RE fallback covers tools that omit `is_error` entirely (text-only
|
||||
// responses, third-party tools). Explicit `is_error: false` is honored as-is
|
||||
// — the regex is NOT used to second-guess a tool that already declared success.
|
||||
const isError = toolResponse.is_error === true ||
|
||||
(toolResponse.is_error === undefined && ERROR_RE.test(text));
|
||||
if (!isError) return null;
|
||||
const m = text.match(ERROR_RE);
|
||||
const idx = m && typeof m.index === "number" ? m.index : 0;
|
||||
const start = Math.max(0, idx - 20);
|
||||
const slice = text.slice(start, start + 80).toLowerCase().replace(/\s+/g, " ").trim();
|
||||
if (!slice) return null;
|
||||
return djb2(slice);
|
||||
|
||||
// 1. Try canonical code (literal token first, then phrase mapping).
|
||||
let code = null;
|
||||
const codeMatch = text.match(ERROR_CODE_RE);
|
||||
if (codeMatch && ERROR_CODES.has(codeMatch[1])) {
|
||||
code = codeMatch[1];
|
||||
} else {
|
||||
for (const [re, mapped] of ERROR_PHRASE_MAP) {
|
||||
if (re.test(text)) { code = mapped; break; }
|
||||
}
|
||||
}
|
||||
|
||||
// 2. When canonical code matched, the bucket key IS the code — residual
|
||||
// surface text (ports, hostnames, syscall names) varies across instances
|
||||
// of the same root cause, so we hash a fixed sentinel for stability.
|
||||
// When no code matched, normalize residual and hash it for the raw bucket.
|
||||
if (code) {
|
||||
return `${code}:${djb2(code)}`;
|
||||
}
|
||||
const normalized = normalizeErrorText(text);
|
||||
if (!normalized) return null;
|
||||
return `raw:${djb2(normalized)}`;
|
||||
}
|
||||
|
||||
function resetFrictionCounters(state) {
|
||||
@@ -163,6 +305,10 @@ function main() {
|
||||
const input = readStdin();
|
||||
if (!input || typeof input !== "object") return;
|
||||
|
||||
// Weekly rotation check at hook entry — ensures the active journal rolls over
|
||||
// even if this invocation appends nothing.
|
||||
rotateIfNeeded(JOURNAL);
|
||||
|
||||
const event = input.hook_event_name;
|
||||
const session = input.session_id || "unknown";
|
||||
const cwd = input.cwd || process.cwd();
|
||||
@@ -177,7 +323,7 @@ function main() {
|
||||
|
||||
if (event === "UserPromptSubmit") {
|
||||
const prompt = (input.prompt || "").slice(0, 200);
|
||||
if (CORRECTION_RE.test(prompt)) {
|
||||
if (isCorrection(prompt)) {
|
||||
const last = state.tool_window[state.tool_window.length - 1] || {};
|
||||
appendJournal({
|
||||
ts, session, cwd, type: "correction",
|
||||
@@ -348,5 +494,16 @@ function main() {
|
||||
safeWrite(STATE, state);
|
||||
}
|
||||
|
||||
try { main(); } catch {}
|
||||
process.exit(0);
|
||||
// Run main only when executed as a script, not when imported for tests.
|
||||
// import.meta.url comparison is the standard ESM idiom.
|
||||
const isMain = (() => {
|
||||
try {
|
||||
return import.meta.url === `file://${process.argv[1]}`;
|
||||
} catch { return true; }
|
||||
})();
|
||||
if (isMain) {
|
||||
try { main(); } catch {}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
export { errorFingerprint, normalizeErrorText, isCorrection };
|
||||
|
||||
Reference in New Issue
Block a user