#!/usr/bin/env node // adam-skill-utility.mjs — execution-grounded per-skill utility report. // // Inspired by SkillsInjector (arXiv 2605.29794v1), which shows skill injection // should be driven by execution-grounded *utility* Δ(t,s), not surface keyword // match — and that some topically-relevant skills actively *lower* success. // The paper learns Δ(t,s) from rollout outcomes. We don't train anything: the // adam journal already attaches `active_skills` to both positive outcome events // (task_completed, clean_recovery, correction_free_streak) and negative ones // (dead_end, tool_error_loop, …). So we approximate Δ(s) as a co-occurrence // ratio over the data we already collect. // // CAVEAT (honest): this is CO-OCCURRENCE, not causation. A skill active during // a dead_end did not necessarily cause it. Read the report as "which skills // correlate with friction", a prompt for review — never as proof. // // Metric, per skill active during scored events: // pos / neg — count of positive / negative outcome events it co-occurred with // share — pos / (pos+neg) // lift — share − global_baseline (>0 above baseline, <0 below) // wLB — Wilson 95% lower bound of the positive proportion; ranks // *reliably* below-baseline skills to the top (sample-aware) // sevNeg — severity-weighted negative sum (adam SEVERITY_DIVISORS) // topNeg — dominant negative event type // Rows sorted worst-first (lowest wLB) so harmful/over-eager skills surface. // // CLI: // adam-skill-utility.mjs [--home ] [--input ] // [--min ] [--days ] [--json] // --min min event count (n) to treat a skill's signal as confident (default 8) // --days only consider events within the last days (default: all) // --json emit machine-readable JSON instead of the text table // // Reuses adam-utils (jsonl IO) and adam-score (canonical NEGATIVE set + // severity), so the positive/negative taxonomy stays single-sourced. import { readFileSync } from "node:fs"; import { join } from "node:path"; import { homedir } from "node:os"; import { readJsonlSafe, listJsonlFiles } from "./adam-utils.mjs"; import { NEGATIVE_SIGNAL_TYPES, entrySeverity } from "./adam-score.mjs"; // Positive outcome signals (mirror adam's vocabulary; task_completed is adam's // canonical "clean task", the same one adam-score uses for reinforcement). export const POSITIVE_SIGNAL_TYPES = new Set([ "task_completed", "clean_recovery", "correction_free_streak", ]); export const DEFAULT_MIN_SAMPLE = 8; function round(x) { return Math.round(x * 1000) / 1000; } // Wilson score interval lower bound for a binomial proportion. Sample-aware: // a skill with 1 pos / 0 neg does NOT outrank one with 40 pos / 2 neg. export function wilsonLower(pos, n, z = 1.96) { if (n <= 0) return 0; const p = pos / n; const z2 = z * z; const denom = 1 + z2 / n; const center = p + z2 / (2 * n); const margin = z * Math.sqrt((p * (1 - p) + z2 / (4 * n)) / n); return (center - margin) / denom; } // computeSkillUtility: pure. entries → { baseline, totalPos, totalNeg, min, skills[] }. export function computeSkillUtility(entries, opts = {}) { const min = Number.isFinite(opts.min) ? opts.min : DEFAULT_MIN_SAMPLE; const per = new Map(); let totalPos = 0; let totalNeg = 0; for (const e of entries || []) { if (!e || typeof e !== "object") continue; const isPos = POSITIVE_SIGNAL_TYPES.has(e.type); const isNeg = NEGATIVE_SIGNAL_TYPES.has(e.type); if (!isPos && !isNeg) continue; if (isPos) totalPos++; else totalNeg++; const sev = isNeg ? entrySeverity(e) : 0; const skills = Array.isArray(e.active_skills) ? e.active_skills : []; for (const slug of skills) { if (!slug || typeof slug !== "string") continue; if (!per.has(slug)) { per.set(slug, { pos: 0, neg: 0, sevNeg: 0, negTypes: {}, recent_ts: null }); } const s = per.get(slug); if (isPos) { s.pos++; } else { s.neg++; s.sevNeg += sev; s.negTypes[e.type] = (s.negTypes[e.type] || 0) + 1; } const ts = typeof e.ts === "string" ? e.ts : null; if (ts && (!s.recent_ts || ts > s.recent_ts)) s.recent_ts = ts; } } const scored = totalPos + totalNeg; const baseline = scored ? totalPos / scored : 0; const skills = []; for (const [slug, s] of per.entries()) { const n = s.pos + s.neg; const share = n ? s.pos / n : 0; const topNeg = Object.entries(s.negTypes).sort((a, b) => b[1] - a[1])[0]; skills.push({ skill: slug, n, pos: s.pos, neg: s.neg, share: round(share), lift: round(share - baseline), wLB: round(wilsonLower(s.pos, n)), sevNeg: s.sevNeg, topNeg: topNeg ? topNeg[0] : null, lowSample: n < min, recent_ts: s.recent_ts, }); } // Worst-first: lowest Wilson lower bound, then most negatives. skills.sort( (a, b) => a.wLB - b.wLB || b.neg - a.neg || (a.skill < b.skill ? -1 : a.skill > b.skill ? 1 : 0), ); return { baseline: round(baseline), totalPos, totalNeg, min, skills }; } function parseArgs(argv) { const args = { home: null, input: null, min: DEFAULT_MIN_SAMPLE, days: null, json: false, help: false }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === "--home" && i + 1 < argv.length) args.home = argv[++i]; else if (a === "--input" && i + 1 < argv.length) args.input = argv[++i]; else if (a === "--min" && i + 1 < argv.length) args.min = Number(argv[++i]); else if (a === "--days" && i + 1 < argv.length) args.days = Number(argv[++i]); else if (a === "--json") args.json = true; else if (a === "--help" || a === "-h") args.help = true; } return args; } function readAllStdin() { try { return readFileSync(0, "utf8"); } catch { return ""; } } function entriesFromText(text) { const out = []; for (const line of (text || "").split("\n")) { if (!line) continue; try { out.push(JSON.parse(line)); } catch { /* skip */ } } return out; } // Same gathering strategy as adam-score.mjs: explicit --input, else piped // stdin (e.g. from adam-window.mjs), else the active journal + rotated files. function gatherInputEntries(args) { if (args.input) return readJsonlSafe(args.input); if (!process.stdin.isTTY) { const piped = readAllStdin(); if (piped && piped.trim()) return entriesFromText(piped); } const home = args.home || join(homedir(), ".claude"); const adamRoot = join(home, "adam"); const sources = [join(adamRoot, "journal.jsonl"), ...listJsonlFiles(join(adamRoot, "journal"))]; const all = []; for (const p of sources) { for (const e of readJsonlSafe(p)) all.push(e); } return all; } function filterByDays(entries, days) { if (!Number.isFinite(days) || days <= 0) return entries; // Anchor the window to the newest ts in the data (avoids Date.now() // nondeterminism and works on historical exports). let maxMs = 0; for (const e of entries) { const ms = e && typeof e.ts === "string" ? Date.parse(e.ts) : NaN; if (Number.isFinite(ms) && ms > maxMs) maxMs = ms; } if (!maxMs) return entries; const cutoff = maxMs - days * 86400000; return entries.filter((e) => { const ms = e && typeof e.ts === "string" ? Date.parse(e.ts) : NaN; return Number.isFinite(ms) ? ms >= cutoff : false; }); } function pad(s, w) { s = String(s); return s.length >= w ? s : s + " ".repeat(w - s.length); } function padL(s, w) { s = String(s); return s.length >= w ? s : " ".repeat(w - s.length) + s; } function renderText(report) { const { baseline, totalPos, totalNeg, min, skills } = report; const lines = []; lines.push("adam skill-utility report — execution-grounded Δ(skill) proxy"); lines.push( `baseline positive-rate ${(baseline * 100).toFixed(1)}% ` + `(${totalPos} positive / ${totalNeg} negative outcome events) min-sample n≥${min}`, ); lines.push("CAVEAT: co-occurrence, not causation. Worst-first. ⚠ = below baseline with n≥min."); lines.push(""); const head = pad("skill", 44) + padL("n", 5) + padL("pos", 6) + padL("neg", 6) + padL("share", 8) + padL("lift", 8) + padL("wLB", 7) + padL("sevNeg", 8) + " " + pad("topNeg", 18) + "flag"; lines.push(head); lines.push("-".repeat(head.length)); for (const s of skills) { const below = s.lift < 0 && !s.lowSample; const flag = below ? "⚠" : s.lowSample ? "·(low n)" : ""; lines.push( pad(s.skill, 44) + padL(s.n, 5) + padL(s.pos, 6) + padL(s.neg, 6) + padL((s.share * 100).toFixed(0) + "%", 8) + padL((s.lift >= 0 ? "+" : "") + (s.lift * 100).toFixed(0) + "%", 8) + padL(s.wLB.toFixed(2), 7) + padL(s.sevNeg, 8) + " " + pad(s.topNeg || "-", 18) + flag, ); } return lines.join("\n"); } function main() { const args = parseArgs(process.argv.slice(2)); if (args.help) { process.stdout.write( "usage: adam-skill-utility.mjs [--home ] [--input ] " + "[--min ] [--days ] [--json]\n", ); process.exit(0); } try { let entries = gatherInputEntries(args); entries = filterByDays(entries, args.days); const report = computeSkillUtility(entries, { min: args.min }); if (args.json) { process.stdout.write(JSON.stringify(report) + "\n"); } else { process.stdout.write(renderText(report) + "\n"); } process.exit(0); } catch (e) { process.stderr.write(`adam-skill-utility error: ${e.message}\n`); process.exit(1); } } if (import.meta.url === `file://${process.argv[1]}`) { main(); }