feat(v0.6.0): review hardening — live active_skills clustering, computable fingerprints

Full codebase review (multi-agent, adversarially verified) surfaced several documented-but-dead mechanisms and doc/code drift. Fixes: - adam-observe: struggle signals now emit `active_skills`, so silent_drift's primary cluster key AND §5b skill-attribution sub-clustering (+1 rubric bonus) actually fire — both were silently dead (no struggle signal carried the field). - adam-cooldown: new `--compute` CLI deterministically derives proposal_fingerprint. The exported computeProposalFingerprint() was never called and the analyst was told to hand-compute a djb2 hash it cannot reproduce. Spec now mandates a *stable* cluster id so fingerprints reproduce across /reflect runs. Removed one dead normalization line. - spec: reinforcement proposals excluded from A/B tracking — agents/adam.md contradicted itself (:376 included, :476 excluded); SKILL.md aligned. - adam-nudge: PENDING_CHECK_PATHS now mirrors the full install set (adam-utils / adam-batch / adam-rollback were missing). - adam-explain: synthesized clustering summary carries `regressions: 0` (structural consistency with parsed summaries). - docs: test-count drift (87/94 -> 126) and "350-line hook" (-> ~600) fixed; adam-score header documents severity_sum/severity_by_type; adam-batch §4 reference corrected. Tests: +12 assertions (114 -> 126), all green. New regression tests cover the active_skills fix and --compute, plus boundary gaps the review flagged: retry_loop/weak_agent thresholds, A/B exact +/-25% deltas, cooldown 30d blacklist edge.
2026-06-11 23:29:35 +00:00 · 2026-05-29 01:57:44 +01:00
parent 2d9257922f
commit 4b36d6c09e
10 changed files with 219 additions and 20 deletions
@@ -4,7 +4,7 @@
 // automatically curated batch of production-failure evidence."
 //
 // Each batch groups entries by (signal_type, cluster_key) where cluster_key
-// follows the same clustering rules as agents/adam.md §4:
+// follows the same clustering rules as agents/adam.md ## Signal types / ## Process step 4:
 //   correction     → tokenized phrase (cross-cwd)
 //   retry_loop     → tool
 //   weak_agent     → subagent_type
@@ -4,8 +4,12 @@
 //
 // CLI:
 //   adam-cooldown.mjs --skill <slug> --fingerprint <hash> [--home <path>]
+//   adam-cooldown.mjs --compute --skill <slug> --cluster <id> [--diff-file <path>]
+//     → prints {"fingerprint":"<djb2_base36>"}; diff body read from --diff-file
+//       or stdin. This is how proposal_fingerprint is populated (the analyst
+//       runs it via Bash after drafting a proposal).
 //
-// Output: JSON one-liner with shape
+// Output (gate mode): JSON one-liner with shape
 //   { "status": "cool"|"cooldown"|"blacklisted",
 //     "reason": "<human-readable reason>",
 //     "blocked_by": { "file": "<basename>", "days_remaining": <int> } | null }
@@ -33,12 +37,15 @@ const DAY_MS = 86400000;
 export const LEGACY_FINGERPRINT = "legacy";

 function parseArgs(argv) {
-  const args = { home: null, skill: null, fingerprint: null, help: false };
+  const args = { home: null, skill: null, fingerprint: null, compute: false, cluster: null, diffFile: null, help: false };
  for (let i = 0; i < argv.length; i++) {
    const a = argv[i];
    if (a === "--home" && i + 1 < argv.length) args.home = argv[++i];
    else if (a === "--skill" && i + 1 < argv.length) args.skill = argv[++i];
    else if (a === "--fingerprint" && i + 1 < argv.length) args.fingerprint = argv[++i];
+    else if (a === "--cluster" && i + 1 < argv.length) args.cluster = argv[++i];
+    else if (a === "--diff-file" && i + 1 < argv.length) args.diffFile = argv[++i];
+    else if (a === "--compute") args.compute = true;
    else if (a === "--help" || a === "-h") args.help = true;
  }
  return args;
@@ -158,9 +165,11 @@ export function computeProposalFingerprint(proposal) {
  if (!proposal || typeof proposal !== "object") return LEGACY_FINGERPRINT;
  const skill = proposal.skill_slug || proposal.target_skill || proposal.skill || "";
  const cluster = proposal.signal_cluster_id || proposal.cluster_id || "";
+  // normalized_diff_body: whitespace (incl. newlines) collapsed to single
+  // spaces, then trimmed. Matches agents/adam.md §"Per-(skill, fingerprint)
+  // cooldown". (No trailing-newline strip needed — \s+ already absorbed them.)
  const diff = String(proposal.diff_body || proposal.proposed_change || "")
    .replace(/\s+/g, " ")
-    .replace(/\n+$/g, "")
    .trim();
  return djb2(`${skill}\n${cluster}\n${diff}`);
 }
@@ -168,7 +177,28 @@ export function computeProposalFingerprint(proposal) {
 function main() {
  const args = parseArgs(process.argv.slice(2));
  if (args.help) {
-    process.stdout.write("usage: adam-cooldown.mjs --skill <slug> --fingerprint <hash> [--home <path>]\n");
+    process.stdout.write(
+      "usage: adam-cooldown.mjs --skill <slug> --fingerprint <hash> [--home <path>]\n" +
+      "       adam-cooldown.mjs --compute --skill <slug> --cluster <id> [--diff-file <path>]\n"
+    );
+    process.exit(0);
+  }
+  // --compute: deterministically derive a proposal_fingerprint. The analyst
+  // invokes this (it has Bash) after drafting a proposal, then writes the
+  // result into proposal frontmatter so the cooldown gate keys on it.
+  if (args.compute) {
+    let diff = "";
+    if (args.diffFile) {
+      try { diff = readFileSync(args.diffFile, "utf8"); } catch { /* empty → still deterministic */ }
+    } else {
+      try { diff = readFileSync(0, "utf8"); } catch { /* no stdin */ }
+    }
+    const fp = computeProposalFingerprint({
+      skill_slug: args.skill || "",
+      signal_cluster_id: args.cluster || "",
+      diff_body: diff,
+    });
+    process.stdout.write(JSON.stringify({ fingerprint: fp }) + "\n");
    process.exit(0);
  }
  if (!args.skill || !args.fingerprint) {
@@ -135,6 +135,7 @@ export function parseTrace(text) {
      considered: clusters.length,
      emitted,
      skipped: clusters.length - emitted,
+      regressions: 0,
      reasons,
    };
  }
@@ -23,7 +23,8 @@
 // Output: JSON object
 //   {
 //     "sessions": [
-//       {"session_id": "...", "negative_count": N, "task_completed_count": M, "dampener": 1.0}
+//       {"session_id": "...", "negative_count": N, "task_completed_count": M,
+//        "severity_sum": S, "severity_by_type": {"<type>": N, ...}, "dampener": 1.0}
 //     ],
 //     "reinforcement_candidates": [
 //       {"skill_slug": "tdd-loop", "count": 3, "recent_ts": "..."}