claude-adam/adam/scripts/adam-batch.mjs

#!/usr/bin/env node
// adam-batch.mjs — pre-clusters windowed journal entries into coherent failure
// batches before analyst dispatch. Implements MOSS §3.1: "anchored to an
// automatically curated batch of production-failure evidence."
//
// Each batch groups entries by (signal_type, cluster_key) where cluster_key
// follows the same clustering rules as agents/adam.md ## Signal types / ## Process step 4:
//   correction     → tokenized phrase (cross-cwd)
//   retry_loop     → tool
//   weak_agent     → subagent_type
//   tool_error_loop→ fp
//   dead_end       → session
//   edit_churn     → file basename
//   build_loop     → session
//   subagent_dispatch_pattern → subagent_type
//   silent_drift   → active_skills[0]
//   error_after_recovery → (recovered_from, original_fp)
//   correction_free_streak → active_skills[0]
//   clean_recovery → (recovered_from, active_skills[0])
//   task_completed → sorted tool_kinds tuple
//
// CLI:
//   adam-batch.mjs [--input <jsonl-path>] [--min-entries N] [--min-sessions N]
//
// Output: JSON object with `batches` array and `unbatched` count.

import { readFileSync } from "node:fs";
import { readJsonlSafe } from "./adam-utils.mjs";

const DEFAULT_MIN_ENTRIES = 1;
const DEFAULT_MIN_SESSIONS = 1;

const CORRECTION_STOPWORDS = new Set([
  "the", "a", "an", "and", "or", "but", "of", "to", "for", "in", "on",
  "with", "use", "when", "where", "what", "why", "how", "this", "that",
  "these", "those", "is", "are", "was", "were", "be", "been", "being",
  "do", "does", "did", "doing", "has", "have", "had", "your", "you",
  "i", "it", "as", "at", "by", "from", "not", "no",
]);

function tokenizePhrase(phrase) {
  if (!phrase || typeof phrase !== "string") return "";
  return phrase.toLowerCase()
    .split(/\s+/)
    .map(t => t.replace(/^[^\w']+|[^\w']+$/g, ""))
    .filter(t => t && !CORRECTION_STOPWORDS.has(t))
    .sort()
    .join("|");
}

function clusterKey(entry) {
  if (!entry || typeof entry !== "object") return null;
  const t = entry.type;
  switch (t) {
    case "correction":
      return tokenizePhrase(entry.phrase) || "unknown";
    case "retry_loop":
      return entry.tool || "unknown";
    case "weak_agent":
    case "subagent_dispatch_pattern":
      return entry.subagent_type || "unknown";
    case "tool_error_loop":
      return entry.fp || "unknown";
    case "dead_end":
    case "build_loop":
      return entry.session || "unknown";
    case "edit_churn":
      return entry.file ? entry.file.split("/").pop() : "unknown";
    case "silent_drift":
    case "correction_free_streak":
      return Array.isArray(entry.active_skills) ? (entry.active_skills[0] || "") : "";
    case "error_after_recovery":
      return `${entry.recovered_from || "?"}:${entry.original_fp || "?"}`;
    case "clean_recovery":
      return `${entry.recovered_from || "?"}:${Array.isArray(entry.active_skills) ? (entry.active_skills[0] || "") : ""}`;
    case "task_completed":
      return Array.isArray(entry.tool_kinds) ? entry.tool_kinds.slice().sort().join(",") : "unknown";
    default:
      return entry.session || "unknown";
  }
}

function parseArgs(argv) {
  const args = { input: null, minEntries: DEFAULT_MIN_ENTRIES, minSessions: DEFAULT_MIN_SESSIONS, help: false };
  for (let i = 0; i < argv.length; i++) {
    const a = argv[i];
    if (a === "--input" && i + 1 < argv.length) args.input = argv[++i];
    else if (a === "--min-entries" && i + 1 < argv.length) {
      const n = Number(argv[++i]);
      if (!Number.isNaN(n) && n > 0) args.minEntries = n;
    }
    else if (a === "--min-sessions" && i + 1 < argv.length) {
      const n = Number(argv[++i]);
      if (!Number.isNaN(n) && n > 0) args.minSessions = n;
    }
    else if (a === "--help" || a === "-h") args.help = true;
  }
  return args;
}

export function buildBatches(entries, opts = {}) {
  const minEntries = opts.minEntries || DEFAULT_MIN_ENTRIES;
  const minSessions = opts.minSessions || DEFAULT_MIN_SESSIONS;
  const map = new Map();

  for (const e of entries || []) {
    if (!e || typeof e !== "object" || !e.type) continue;
    const key = `${e.type}::${clusterKey(e)}`;
    if (!map.has(key)) {
      map.set(key, {
        batch_id: null,
        signal_type: e.type,
        cluster_key: clusterKey(e),
        entries: [],
        sessions: new Set(),
        cwds: new Set(),
      });
    }
    const batch = map.get(key);
    batch.entries.push(e);
    if (e.session) batch.sessions.add(e.session);
    if (e.cwd) batch.cwds.add(e.cwd);
  }

  const batches = [];
  let unbatched = 0;
  let id = 1;
  for (const [, batch] of map) {
    if (batch.entries.length < minEntries || batch.sessions.size < minSessions) {
      unbatched += batch.entries.length;
      continue;
    }
    batch.batch_id = `b${id++}`;
    batches.push({
      batch_id: batch.batch_id,
      signal_type: batch.signal_type,
      cluster_key: batch.cluster_key,
      entry_count: batch.entries.length,
      session_count: batch.sessions.size,
      cwd_count: batch.cwds.size,
      has_context_window: batch.entries.some(e => Array.isArray(e.context_window) && e.context_window.length > 0),
      entries: batch.entries,
    });
  }

  batches.sort((a, b) => b.entry_count - a.entry_count);
  return { batches, unbatched, total: (entries || []).length };
}

function main() {
  const args = parseArgs(process.argv.slice(2));
  if (args.help) {
    process.stdout.write("usage: adam-batch.mjs [--input <jsonl-path>] [--min-entries N] [--min-sessions N]\n");
    process.exit(0);
  }
  try {
    let entries;
    if (args.input) {
      entries = readJsonlSafe(args.input);
    } else if (!process.stdin.isTTY) {
      const buf = readFileSync(0, "utf8");
      entries = [];
      for (const line of buf.split("\n")) {
        if (!line) continue;
        try { entries.push(JSON.parse(line)); } catch { /* skip */ }
      }
    } else {
      process.stderr.write("adam-batch: no input (use --input or pipe)\n");
      process.exit(1);
    }
    const result = buildBatches(entries, { minEntries: args.minEntries, minSessions: args.minSessions });
    process.stdout.write(JSON.stringify(result) + "\n");
    process.exit(0);
  } catch (e) {
    process.stderr.write(`adam-batch error: ${e.message}\n`);
    process.exit(1);
  }
}

if (import.meta.url === `file://${process.argv[1]}`) {
  main();
}

export { clusterKey, tokenizePhrase };