claude-adam/adam/scripts/adam-skill-utility.mjs

#!/usr/bin/env node
// adam-skill-utility.mjs — execution-grounded per-skill utility report.
//
// Inspired by SkillsInjector (arXiv 2605.29794v1), which shows skill injection
// should be driven by execution-grounded *utility* Δ(t,s), not surface keyword
// match — and that some topically-relevant skills actively *lower* success.
// The paper learns Δ(t,s) from rollout outcomes. We don't train anything: the
// adam journal already attaches `active_skills` to both positive outcome events
// (task_completed, clean_recovery, correction_free_streak) and negative ones
// (dead_end, tool_error_loop, …). So we approximate Δ(s) as a co-occurrence
// ratio over the data we already collect.
//
// CAVEAT (honest): this is CO-OCCURRENCE, not causation. A skill active during
// a dead_end did not necessarily cause it. Read the report as "which skills
// correlate with friction", a prompt for review — never as proof.
//
// Metric, per skill active during scored events:
//   pos / neg     — count of positive / negative outcome events it co-occurred with
//   share         — pos / (pos+neg)
//   lift          — share − global_baseline   (>0 above baseline, <0 below)
//   wLB           — Wilson 95% lower bound of the positive proportion; ranks
//                   *reliably* below-baseline skills to the top (sample-aware)
//   sevNeg        — severity-weighted negative sum (adam SEVERITY_DIVISORS)
//   topNeg        — dominant negative event type
// Rows sorted worst-first (lowest wLB) so harmful/over-eager skills surface.
//
// CLI:
//   adam-skill-utility.mjs [--home <path>] [--input <jsonl-path>]
//                          [--min <n>] [--days <n>] [--json]
//   --min   min event count (n) to treat a skill's signal as confident (default 8)
//   --days  only consider events within the last <n> days (default: all)
//   --json  emit machine-readable JSON instead of the text table
//
// Reuses adam-utils (jsonl IO) and adam-score (canonical NEGATIVE set +
// severity), so the positive/negative taxonomy stays single-sourced.

import { readFileSync } from "node:fs";
import { join } from "node:path";
import { homedir } from "node:os";
import { readJsonlSafe, listJsonlFiles } from "./adam-utils.mjs";
import { NEGATIVE_SIGNAL_TYPES, entrySeverity } from "./adam-score.mjs";

// Positive outcome signals (mirror adam's vocabulary; task_completed is adam's
// canonical "clean task", the same one adam-score uses for reinforcement).
export const POSITIVE_SIGNAL_TYPES = new Set([
  "task_completed",
  "clean_recovery",
  "correction_free_streak",
]);

export const DEFAULT_MIN_SAMPLE = 8;

function round(x) {
  return Math.round(x * 1000) / 1000;
}

// Wilson score interval lower bound for a binomial proportion. Sample-aware:
// a skill with 1 pos / 0 neg does NOT outrank one with 40 pos / 2 neg.
export function wilsonLower(pos, n, z = 1.96) {
  if (n <= 0) return 0;
  const p = pos / n;
  const z2 = z * z;
  const denom = 1 + z2 / n;
  const center = p + z2 / (2 * n);
  const margin = z * Math.sqrt((p * (1 - p) + z2 / (4 * n)) / n);
  return (center - margin) / denom;
}

// computeSkillUtility: pure. entries → { baseline, totalPos, totalNeg, min, skills[] }.
export function computeSkillUtility(entries, opts = {}) {
  const min = Number.isFinite(opts.min) ? opts.min : DEFAULT_MIN_SAMPLE;
  const per = new Map();
  let totalPos = 0;
  let totalNeg = 0;

  for (const e of entries || []) {
    if (!e || typeof e !== "object") continue;
    const isPos = POSITIVE_SIGNAL_TYPES.has(e.type);
    const isNeg = NEGATIVE_SIGNAL_TYPES.has(e.type);
    if (!isPos && !isNeg) continue;

    if (isPos) totalPos++;
    else totalNeg++;
    const sev = isNeg ? entrySeverity(e) : 0;

    const skills = Array.isArray(e.active_skills) ? e.active_skills : [];
    for (const slug of skills) {
      if (!slug || typeof slug !== "string") continue;
      if (!per.has(slug)) {
        per.set(slug, { pos: 0, neg: 0, sevNeg: 0, negTypes: {}, recent_ts: null });
      }
      const s = per.get(slug);
      if (isPos) {
        s.pos++;
      } else {
        s.neg++;
        s.sevNeg += sev;
        s.negTypes[e.type] = (s.negTypes[e.type] || 0) + 1;
      }
      const ts = typeof e.ts === "string" ? e.ts : null;
      if (ts && (!s.recent_ts || ts > s.recent_ts)) s.recent_ts = ts;
    }
  }

  const scored = totalPos + totalNeg;
  const baseline = scored ? totalPos / scored : 0;

  const skills = [];
  for (const [slug, s] of per.entries()) {
    const n = s.pos + s.neg;
    const share = n ? s.pos / n : 0;
    const topNeg = Object.entries(s.negTypes).sort((a, b) => b[1] - a[1])[0];
    skills.push({
      skill: slug,
      n,
      pos: s.pos,
      neg: s.neg,
      share: round(share),
      lift: round(share - baseline),
      wLB: round(wilsonLower(s.pos, n)),
      sevNeg: s.sevNeg,
      topNeg: topNeg ? topNeg[0] : null,
      lowSample: n < min,
      recent_ts: s.recent_ts,
    });
  }
  // Worst-first: lowest Wilson lower bound, then most negatives.
  skills.sort(
    (a, b) =>
      a.wLB - b.wLB ||
      b.neg - a.neg ||
      (a.skill < b.skill ? -1 : a.skill > b.skill ? 1 : 0),
  );

  return { baseline: round(baseline), totalPos, totalNeg, min, skills };
}

function parseArgs(argv) {
  const args = { home: null, input: null, min: DEFAULT_MIN_SAMPLE, days: null, json: false, help: false };
  for (let i = 0; i < argv.length; i++) {
    const a = argv[i];
    if (a === "--home" && i + 1 < argv.length) args.home = argv[++i];
    else if (a === "--input" && i + 1 < argv.length) args.input = argv[++i];
    else if (a === "--min" && i + 1 < argv.length) args.min = Number(argv[++i]);
    else if (a === "--days" && i + 1 < argv.length) args.days = Number(argv[++i]);
    else if (a === "--json") args.json = true;
    else if (a === "--help" || a === "-h") args.help = true;
  }
  return args;
}

function readAllStdin() {
  try { return readFileSync(0, "utf8"); } catch { return ""; }
}

function entriesFromText(text) {
  const out = [];
  for (const line of (text || "").split("\n")) {
    if (!line) continue;
    try { out.push(JSON.parse(line)); } catch { /* skip */ }
  }
  return out;
}

// Same gathering strategy as adam-score.mjs: explicit --input, else piped
// stdin (e.g. from adam-window.mjs), else the active journal + rotated files.
function gatherInputEntries(args) {
  if (args.input) return readJsonlSafe(args.input);
  if (!process.stdin.isTTY) {
    const piped = readAllStdin();
    if (piped && piped.trim()) return entriesFromText(piped);
  }
  const home = args.home || join(homedir(), ".claude");
  const adamRoot = join(home, "adam");
  const sources = [join(adamRoot, "journal.jsonl"), ...listJsonlFiles(join(adamRoot, "journal"))];
  const all = [];
  for (const p of sources) {
    for (const e of readJsonlSafe(p)) all.push(e);
  }
  return all;
}

function filterByDays(entries, days) {
  if (!Number.isFinite(days) || days <= 0) return entries;
  // Anchor the window to the newest ts in the data (avoids Date.now()
  // nondeterminism and works on historical exports).
  let maxMs = 0;
  for (const e of entries) {
    const ms = e && typeof e.ts === "string" ? Date.parse(e.ts) : NaN;
    if (Number.isFinite(ms) && ms > maxMs) maxMs = ms;
  }
  if (!maxMs) return entries;
  const cutoff = maxMs - days * 86400000;
  return entries.filter((e) => {
    const ms = e && typeof e.ts === "string" ? Date.parse(e.ts) : NaN;
    return Number.isFinite(ms) ? ms >= cutoff : false;
  });
}

function pad(s, w) {
  s = String(s);
  return s.length >= w ? s : s + " ".repeat(w - s.length);
}
function padL(s, w) {
  s = String(s);
  return s.length >= w ? s : " ".repeat(w - s.length) + s;
}

function renderText(report) {
  const { baseline, totalPos, totalNeg, min, skills } = report;
  const lines = [];
  lines.push("adam skill-utility report — execution-grounded Δ(skill) proxy");
  lines.push(
    `baseline positive-rate ${(baseline * 100).toFixed(1)}%  ` +
      `(${totalPos} positive / ${totalNeg} negative outcome events)  min-sample n≥${min}`,
  );
  lines.push("CAVEAT: co-occurrence, not causation. Worst-first. ⚠ = below baseline with n≥min.");
  lines.push("");
  const head =
    pad("skill", 44) + padL("n", 5) + padL("pos", 6) + padL("neg", 6) +
    padL("share", 8) + padL("lift", 8) + padL("wLB", 7) + padL("sevNeg", 8) +
    "  " + pad("topNeg", 18) + "flag";
  lines.push(head);
  lines.push("-".repeat(head.length));
  for (const s of skills) {
    const below = s.lift < 0 && !s.lowSample;
    const flag = below ? "⚠" : s.lowSample ? "·(low n)" : "";
    lines.push(
      pad(s.skill, 44) +
        padL(s.n, 5) +
        padL(s.pos, 6) +
        padL(s.neg, 6) +
        padL((s.share * 100).toFixed(0) + "%", 8) +
        padL((s.lift >= 0 ? "+" : "") + (s.lift * 100).toFixed(0) + "%", 8) +
        padL(s.wLB.toFixed(2), 7) +
        padL(s.sevNeg, 8) +
        "  " +
        pad(s.topNeg || "-", 18) +
        flag,
    );
  }
  return lines.join("\n");
}

function main() {
  const args = parseArgs(process.argv.slice(2));
  if (args.help) {
    process.stdout.write(
      "usage: adam-skill-utility.mjs [--home <path>] [--input <jsonl-path>] " +
        "[--min <n>] [--days <n>] [--json]\n",
    );
    process.exit(0);
  }
  try {
    let entries = gatherInputEntries(args);
    entries = filterByDays(entries, args.days);
    const report = computeSkillUtility(entries, { min: args.min });
    if (args.json) {
      process.stdout.write(JSON.stringify(report) + "\n");
    } else {
      process.stdout.write(renderText(report) + "\n");
    }
    process.exit(0);
  } catch (e) {
    process.stderr.write(`adam-skill-utility error: ${e.message}\n`);
    process.exit(1);
  }
}

if (import.meta.url === `file://${process.argv[1]}`) {
  main();
}