mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-09 23:19:12 +00:00
012c40b9ab
Storage/window/exclusion split (#7): ISO-week journal rotation with safety fuse replaces size-based rotation (fixes silent under-counting when clusters straddle boundaries). Per-signal sliding windows via adam-window.mjs guard against stale signal accumulation. Legacy YYYY-MM-DD-<ts>.jsonl files remain readable. Error fingerprint normalization (#3): adam-observe.mjs extracts canonical error codes (ENOENT, ECONNREFUSED, etc.) and normalizes paths/timestamps/hex before hashing. 'Connection refused' and 'ECONNREFUSED' now cluster identically. Correction corpus expansion (#1): strong tokens (stop, wrong, undo, try again, different approach, etc.) fire on any occurrence. Weak tokens (no, actually, wait) require negation/contrast co-occurrence within 8 tokens. Kills the 'actually, I think...' false positive. Analyst observability (#6): mandatory clustering trace block; adam-explain.mjs parses to summary/full/json. Cluster decisions now surface rejection reasons (threshold, contradiction, window). Persisted to ~/.claude/adam/last-trace.txt. Dead_end nudge proposal type (#2): single-session auto-apply gate (>=3 dead_end events). Action appends to active-nudges.json, surfaced via adam-nudge.mjs at next SessionStart. Lower blast than skill_edit. Per-(skill, fingerprint) cooldown (#4): adam-cooldown.mjs replaces coarse per-skill check. proposal_fingerprint = djb2(skill_slug + cluster_id + normalized_diff_body). Legacy applied/rejected records gate via 'legacy' fingerprint fallback through resolveSkill helper (handles target_skill, skill, or target: <path>). task_completed scoring integration (#8): adam-score.mjs computes per-session urgency dampener (3 task_completed -> 0.5) and reinforcement candidates (skills cited in >=3 clean completions). New 'reinforcement' proposal type appends to reinforcements.jsonl on apply (no code/memory mutation). A/B effectiveness measurement (#5): every auto-applied edit appends to ab-tracking.jsonl. adam-ab-measure.mjs computes 7d pre/post signal-count delta per entry (improved / neutral / regressed / no_baseline / pending). Analyst surfaces regressions at top of /reflect output. Upgrade UX overhaul (#9): adam-upgrade.mjs implements --list/--diff/--accept /--accept-all. SessionStart nudge prints pending-merge warning when .adam-new files exist (latency ~20ms via fixed shortlist). install.sh emits unmissable final-message hint after creating any .adam-new file. Simplify pass: adam-utils.mjs deduplicates readJsonlSafe / listJsonlFiles / parseFrontmatter across 8 scripts. Net -46 LOC. Test coverage: 30 -> 87 tests. Every new feature has feature-validating assertions (false-case coverage included). T77 statically verifies install.sh references every adam-*.mjs source script (would have caught the missing adam-utils inclusion that review #2 surfaced).
191 lines
7.1 KiB
JavaScript
Executable File
191 lines
7.1 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
// adam-ab-measure.mjs — A/B effectiveness measurement on auto-applied edits.
|
|
//
|
|
// Reads ~/.claude/adam/ab-tracking.jsonl (one line per auto-apply event,
|
|
// written by adam-self-improvement/SKILL.md), then for each entry old enough
|
|
// (>= --min-age-days; default 7) compares signal counts in the 7-day window
|
|
// BEFORE applied_at against the 7-day window AFTER applied_at across the
|
|
// full journal corpus (active + rotated). Surfaces regressions so /reflect
|
|
// can flag proposals that made things worse.
|
|
//
|
|
// CLI:
|
|
// adam-ab-measure.mjs [--home <path>] [--format json|table] [--min-age-days N]
|
|
//
|
|
// Output (default `table`): aligned columns sorted regressed-first.
|
|
// Output (`json`): array of deltas.
|
|
// Empty / missing tracking file → empty output, exit 0.
|
|
// Exit 1 only on I/O failure.
|
|
|
|
import { join } from "node:path";
|
|
import { homedir } from "node:os";
|
|
import { readJsonlSafe, listJsonlFiles } from "./adam-utils.mjs";
|
|
|
|
const DAY_MS = 86400000;
|
|
export const DEFAULT_PRE_WINDOW_DAYS = 7;
|
|
export const DEFAULT_MIN_AGE_DAYS = 7;
|
|
|
|
const REGRESSED_PCT = 25;
|
|
const IMPROVED_PCT = -25;
|
|
|
|
function parseArgs(argv) {
|
|
const args = { home: null, format: "table", minAgeDays: DEFAULT_MIN_AGE_DAYS, help: false };
|
|
for (let i = 0; i < argv.length; i++) {
|
|
const a = argv[i];
|
|
if (a === "--home" && i + 1 < argv.length) args.home = argv[++i];
|
|
else if (a === "--format" && i + 1 < argv.length) args.format = argv[++i];
|
|
else if (a === "--min-age-days" && i + 1 < argv.length) {
|
|
const n = Number(argv[++i]);
|
|
if (!Number.isNaN(n) && n >= 0) args.minAgeDays = n;
|
|
}
|
|
else if (a === "--help" || a === "-h") args.help = true;
|
|
}
|
|
return args;
|
|
}
|
|
|
|
function loadJournalAll(claudeHome) {
|
|
const adamRoot = join(claudeHome, "adam");
|
|
const sources = [join(adamRoot, "journal.jsonl"), ...listJsonlFiles(join(adamRoot, "journal"))];
|
|
const all = [];
|
|
for (const p of sources) for (const e of readJsonlSafe(p)) all.push(e);
|
|
return all;
|
|
}
|
|
|
|
function tsMs(e) {
|
|
if (!e || typeof e.ts !== "string") return NaN;
|
|
return Date.parse(e.ts);
|
|
}
|
|
|
|
// computeDeltas: pure function — entries = ab-tracking objects, journal = list
|
|
// of journal entries (any source). opts.now is unix ms; opts.minAgeDays is the
|
|
// floor for non-pending.
|
|
export function computeDeltas(entries, journal, opts = {}) {
|
|
const now = typeof opts.now === "number" ? opts.now : Date.now();
|
|
const minAgeDays = typeof opts.minAgeDays === "number" ? opts.minAgeDays : DEFAULT_MIN_AGE_DAYS;
|
|
const out = [];
|
|
for (const e of entries || []) {
|
|
if (!e || typeof e !== "object") continue;
|
|
const appliedAt = Number(e.applied_at);
|
|
if (!appliedAt || Number.isNaN(appliedAt)) continue;
|
|
const ageDays = (now - appliedAt) / DAY_MS;
|
|
// Symmetric window: same span applied to pre AND post sides. JSONL schema
|
|
// field stays `pre_window_days` for backward compat with existing
|
|
// ab-tracking.jsonl entries — local name reflects symmetry.
|
|
const windowDays = typeof e.pre_window_days === "number" ? e.pre_window_days : DEFAULT_PRE_WINDOW_DAYS;
|
|
const signals = Array.isArray(e.originating_signals)
|
|
? e.originating_signals.map((s) => (s && typeof s === "object" ? s.type : null)).filter(Boolean)
|
|
: [];
|
|
const sigSet = new Set(signals);
|
|
|
|
const base = {
|
|
proposal_id: e.proposal_id || "",
|
|
proposal_type: e.proposal_type || "",
|
|
target_skill: e.target_skill || "",
|
|
applied_at: appliedAt,
|
|
applied_at_iso: new Date(appliedAt).toISOString(),
|
|
signal_types: [...sigSet],
|
|
};
|
|
|
|
if (ageDays < minAgeDays) {
|
|
out.push({ ...base, pre_count: null, post_count: null, delta_pct: null, status: "pending" });
|
|
continue;
|
|
}
|
|
|
|
const preStart = appliedAt - windowDays * DAY_MS;
|
|
const postEnd = appliedAt + windowDays * DAY_MS;
|
|
let preCount = 0;
|
|
let postCount = 0;
|
|
for (const je of journal || []) {
|
|
if (!je || typeof je !== "object") continue;
|
|
if (!sigSet.has(je.type)) continue;
|
|
const t = tsMs(je);
|
|
if (Number.isNaN(t)) continue;
|
|
if (t >= preStart && t < appliedAt) preCount++;
|
|
else if (t >= appliedAt && t < postEnd) postCount++;
|
|
}
|
|
|
|
let status;
|
|
let deltaPct;
|
|
if (preCount === 0) {
|
|
status = "no_baseline";
|
|
deltaPct = null;
|
|
} else {
|
|
deltaPct = ((postCount - preCount) / preCount) * 100;
|
|
// Round to 2 dp for stable comparison + presentation.
|
|
deltaPct = Math.round(deltaPct * 100) / 100;
|
|
if (deltaPct <= IMPROVED_PCT) status = "improved";
|
|
else if (deltaPct >= REGRESSED_PCT) status = "regressed";
|
|
else status = "neutral";
|
|
}
|
|
out.push({ ...base, pre_count: preCount, post_count: postCount, delta_pct: deltaPct, status });
|
|
}
|
|
return out;
|
|
}
|
|
|
|
const STATUS_ORDER = { regressed: 0, neutral: 1, no_baseline: 2, improved: 3, pending: 4 };
|
|
|
|
function sortForTable(deltas) {
|
|
return [...deltas].sort((a, b) => {
|
|
const sa = STATUS_ORDER[a.status] ?? 99;
|
|
const sb = STATUS_ORDER[b.status] ?? 99;
|
|
if (sa !== sb) return sa - sb;
|
|
return a.applied_at - b.applied_at;
|
|
});
|
|
}
|
|
|
|
function padRight(s, n) { s = String(s); return s.length >= n ? s : s + " ".repeat(n - s.length); }
|
|
|
|
export function formatTable(deltas) {
|
|
if (!deltas || !deltas.length) return "";
|
|
const rows = sortForTable(deltas);
|
|
const headers = ["proposal_id", "target", "type", "applied_at(iso)", "pre/post", "delta%", "status"];
|
|
const data = rows.map((d) => [
|
|
d.proposal_id || "-",
|
|
d.target_skill || "-",
|
|
d.proposal_type || "-",
|
|
d.applied_at_iso || "-",
|
|
d.pre_count == null ? "-" : `${d.pre_count}/${d.post_count}`,
|
|
d.delta_pct == null ? "-" : `${d.delta_pct.toFixed(2)}`,
|
|
d.status || "-",
|
|
]);
|
|
const widths = headers.map((h, i) => Math.max(h.length, ...data.map((r) => String(r[i]).length)));
|
|
const lines = [];
|
|
lines.push(headers.map((h, i) => padRight(h, widths[i])).join(" | "));
|
|
lines.push(widths.map((w) => "-".repeat(w)).join("-+-"));
|
|
for (const r of data) lines.push(r.map((c, i) => padRight(c, widths[i])).join(" | "));
|
|
return lines.join("\n");
|
|
}
|
|
|
|
export function formatJson(deltas) {
|
|
return JSON.stringify(deltas || []);
|
|
}
|
|
|
|
function main() {
|
|
const args = parseArgs(process.argv.slice(2));
|
|
if (args.help) {
|
|
process.stdout.write("usage: adam-ab-measure.mjs [--home <path>] [--format json|table] [--min-age-days N]\n");
|
|
process.exit(0);
|
|
}
|
|
const claudeHome = args.home || join(homedir(), ".claude");
|
|
const trackingPath = join(claudeHome, "adam", "ab-tracking.jsonl");
|
|
try {
|
|
const entries = readJsonlSafe(trackingPath);
|
|
if (!entries.length) {
|
|
if (args.format === "json") process.stdout.write("[]\n");
|
|
// table mode prints nothing on empty input — exit 0.
|
|
process.exit(0);
|
|
}
|
|
const journal = loadJournalAll(claudeHome);
|
|
const deltas = computeDeltas(entries, journal, { minAgeDays: args.minAgeDays });
|
|
const out = args.format === "json" ? formatJson(deltas) : formatTable(deltas);
|
|
if (out) process.stdout.write(out + "\n");
|
|
process.exit(0);
|
|
} catch (e) {
|
|
process.stderr.write(`adam-ab-measure error: ${e.message}\n`);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
main();
|
|
}
|