#!/usr/bin/env node // adam-batch.mjs — pre-clusters windowed journal entries into coherent failure // batches before analyst dispatch. Implements MOSS §3.1: "anchored to an // automatically curated batch of production-failure evidence." // // Each batch groups entries by (signal_type, cluster_key) where cluster_key // follows the same clustering rules as agents/adam.md §4: // correction → tokenized phrase (cross-cwd) // retry_loop → tool // weak_agent → subagent_type // tool_error_loop→ fp // dead_end → session // edit_churn → file basename // build_loop → session // subagent_dispatch_pattern → subagent_type // silent_drift → active_skills[0] // error_after_recovery → (recovered_from, original_fp) // correction_free_streak → active_skills[0] // clean_recovery → (recovered_from, active_skills[0]) // task_completed → sorted tool_kinds tuple // // CLI: // adam-batch.mjs [--input ] [--min-entries N] [--min-sessions N] // // Output: JSON object with `batches` array and `unbatched` count. import { readFileSync } from "node:fs"; import { readJsonlSafe } from "./adam-utils.mjs"; const DEFAULT_MIN_ENTRIES = 1; const DEFAULT_MIN_SESSIONS = 1; const CORRECTION_STOPWORDS = new Set([ "the", "a", "an", "and", "or", "but", "of", "to", "for", "in", "on", "with", "use", "when", "where", "what", "why", "how", "this", "that", "these", "those", "is", "are", "was", "were", "be", "been", "being", "do", "does", "did", "doing", "has", "have", "had", "your", "you", "i", "it", "as", "at", "by", "from", "not", "no", ]); function tokenizePhrase(phrase) { if (!phrase || typeof phrase !== "string") return ""; return phrase.toLowerCase() .split(/\s+/) .map(t => t.replace(/^[^\w']+|[^\w']+$/g, "")) .filter(t => t && !CORRECTION_STOPWORDS.has(t)) .sort() .join("|"); } function clusterKey(entry) { if (!entry || typeof entry !== "object") return null; const t = entry.type; switch (t) { case "correction": return tokenizePhrase(entry.phrase) || "unknown"; case "retry_loop": return entry.tool || "unknown"; case "weak_agent": case "subagent_dispatch_pattern": return entry.subagent_type || "unknown"; case "tool_error_loop": return entry.fp || "unknown"; case "dead_end": case "build_loop": return entry.session || "unknown"; case "edit_churn": return entry.file ? entry.file.split("/").pop() : "unknown"; case "silent_drift": case "correction_free_streak": return Array.isArray(entry.active_skills) ? (entry.active_skills[0] || "") : ""; case "error_after_recovery": return `${entry.recovered_from || "?"}:${entry.original_fp || "?"}`; case "clean_recovery": return `${entry.recovered_from || "?"}:${Array.isArray(entry.active_skills) ? (entry.active_skills[0] || "") : ""}`; case "task_completed": return Array.isArray(entry.tool_kinds) ? entry.tool_kinds.slice().sort().join(",") : "unknown"; default: return entry.session || "unknown"; } } function parseArgs(argv) { const args = { input: null, minEntries: DEFAULT_MIN_ENTRIES, minSessions: DEFAULT_MIN_SESSIONS, help: false }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === "--input" && i + 1 < argv.length) args.input = argv[++i]; else if (a === "--min-entries" && i + 1 < argv.length) { const n = Number(argv[++i]); if (!Number.isNaN(n) && n > 0) args.minEntries = n; } else if (a === "--min-sessions" && i + 1 < argv.length) { const n = Number(argv[++i]); if (!Number.isNaN(n) && n > 0) args.minSessions = n; } else if (a === "--help" || a === "-h") args.help = true; } return args; } export function buildBatches(entries, opts = {}) { const minEntries = opts.minEntries || DEFAULT_MIN_ENTRIES; const minSessions = opts.minSessions || DEFAULT_MIN_SESSIONS; const map = new Map(); for (const e of entries || []) { if (!e || typeof e !== "object" || !e.type) continue; const key = `${e.type}::${clusterKey(e)}`; if (!map.has(key)) { map.set(key, { batch_id: null, signal_type: e.type, cluster_key: clusterKey(e), entries: [], sessions: new Set(), cwds: new Set(), }); } const batch = map.get(key); batch.entries.push(e); if (e.session) batch.sessions.add(e.session); if (e.cwd) batch.cwds.add(e.cwd); } const batches = []; let unbatched = 0; let id = 1; for (const [, batch] of map) { if (batch.entries.length < minEntries || batch.sessions.size < minSessions) { unbatched += batch.entries.length; continue; } batch.batch_id = `b${id++}`; batches.push({ batch_id: batch.batch_id, signal_type: batch.signal_type, cluster_key: batch.cluster_key, entry_count: batch.entries.length, session_count: batch.sessions.size, cwd_count: batch.cwds.size, has_context_window: batch.entries.some(e => Array.isArray(e.context_window) && e.context_window.length > 0), entries: batch.entries, }); } batches.sort((a, b) => b.entry_count - a.entry_count); return { batches, unbatched, total: (entries || []).length }; } function main() { const args = parseArgs(process.argv.slice(2)); if (args.help) { process.stdout.write("usage: adam-batch.mjs [--input ] [--min-entries N] [--min-sessions N]\n"); process.exit(0); } try { let entries; if (args.input) { entries = readJsonlSafe(args.input); } else if (!process.stdin.isTTY) { const buf = readFileSync(0, "utf8"); entries = []; for (const line of buf.split("\n")) { if (!line) continue; try { entries.push(JSON.parse(line)); } catch { /* skip */ } } } else { process.stderr.write("adam-batch: no input (use --input or pipe)\n"); process.exit(1); } const result = buildBatches(entries, { minEntries: args.minEntries, minSessions: args.minSessions }); process.stdout.write(JSON.stringify(result) + "\n"); process.exit(0); } catch (e) { process.stderr.write(`adam-batch error: ${e.message}\n`); process.exit(1); } } if (import.meta.url === `file://${process.argv[1]}`) { main(); } export { clusterKey, tokenizePhrase };