feat: apply MOSS-grounded self-evolution improvements to ADAM

Implements 7 improvements grounded in MOSS paper (arXiv 2605.22794): 1. Transcript capture (§3.4): context_ring buffer in adam-observe.mjs captures last 8 events around struggle signals as context_window. 2. Evidence batching (§3.1): new adam-batch.mjs pre-clusters windowed journal entries into coherent failure batches by (signal_type, cluster_key). 3. Multi-stage analysis (§3.3): SKILL.md dispatches adam agent in two stages (diagnose+plan → implement) with inter-stage validation gate. 4. Pre-apply verification (§3.4): 4-check deterministic gate before auto-apply (source entries exist, diagnosis grounded, type-evidence match, no conflicting recent proposals). 5. Auto-rollback (§3.5): new adam-rollback.mjs reverts regressed proposals detected by A/B measurement, creates regression nudges. 6. Harness self-modification (§1 Table 1): new harness_edit proposal type targeting adam's own scripts with stricter gates (confidence≥5, never auto-apply, test-suite-gated). 7. Keypoint matrix evaluation (§4.2): 5 capability dimensions (tool_selection, scope_discipline, error_recovery, first_attempt, build_reliability) scored per batch for structured evaluation. Test suite: 94 → 114 tests (20 new), all passing.
2026-06-29 02:52:39 +00:00 · 2026-05-24 11:15:32 +01:00
parent a48c705c0a
commit 440fb52eb1
7 changed files with 1038 additions and 20 deletions
@@ -107,10 +107,15 @@ const CLEAN_RECOVERY_WINDOW = 3;
 const SILENT_DRIFT_THRESHOLD = 5;
 const ERROR_AFTER_RECOVERY_WINDOW = 5;
 const RECENT_RECOVERIES_MAX = 3;
-const STRUGGLE_TYPES = new Set(["tool_error_loop", "dead_end", "retry_loop"]);
+const STRUGGLE_TYPES = new Set([
+  "tool_error_loop", "dead_end", "retry_loop", "weak_agent",
+  "edit_churn", "build_loop", "silent_drift", "error_after_recovery",
+]);
 const ACTIVE_SKILLS_LOOKBACK = 10;
 const TASK_TOOL_MIN = 5;
 const TASK_DIVERSITY_MIN = 3;
+const CONTEXT_RING_SIZE = 8;
+const CONTEXT_EXCERPT_LEN = 200;
 const STATE_MAX_BYTES = 1_000_000;

 function safeRead(path, fallback) {
@@ -226,6 +231,20 @@ function activeNames(state, kind) {
  return [...seen];
 }

+function excerpt(text, len) {
+  if (!text || typeof text !== "string") return null;
+  return text.length > len ? text.slice(0, len) + "…" : text;
+}
+
+function pushContext(state, entry) {
+  state.context_ring.push(entry);
+  if (state.context_ring.length > CONTEXT_RING_SIZE) state.context_ring.shift();
+}
+
+function snapshotContext(state) {
+  return state.context_ring.length ? state.context_ring.slice() : undefined;
+}
+
 function errorFingerprint(toolResponse) {
  if (!toolResponse) return null;
  let text = "";
@@ -290,6 +309,7 @@ function resetSessionLocal(state) {
  state.recentRecoveries = [];
  state.session_post_count = 0;
  state.tool_window = [];
+  state.context_ring = [];
  state.task_tool_kinds = {};
  state.task_tool_count = 0;
  state.task_corrections = 0;
@@ -316,6 +336,7 @@ function ensureStateDefaults(state) {
  if (typeof state.silentDriftEmitted !== "boolean") state.silentDriftEmitted = false;
  if (!Array.isArray(state.recentRecoveries)) state.recentRecoveries = [];
  if (typeof state.session_post_count !== "number") state.session_post_count = 0;
+  if (!Array.isArray(state.context_ring)) state.context_ring = [];
 }

 function main() {
@@ -340,6 +361,7 @@ function main() {

  if (event === "UserPromptSubmit") {
    const prompt = (input.prompt || "").slice(0, 200);
+    pushContext(state, { event: "user", prompt: excerpt(prompt, CONTEXT_EXCERPT_LEN), ts });
    if (isCorrection(prompt)) {
      const last = state.tool_window[state.tool_window.length - 1] || {};
      appendJournal({
@@ -406,9 +428,27 @@ function main() {
    const argsHash = djb2(JSON.stringify(input.tool_input || {}));
    const file = (input.tool_input && (input.tool_input.file_path || input.tool_input.path)) || null;

+    const toolResponse = input.tool_response;
+    const respExcerpt = (() => {
+      if (!toolResponse) return null;
+      const text = typeof toolResponse === "string" ? toolResponse
+        : typeof toolResponse.content === "string" ? toolResponse.content
+        : null;
+      return excerpt(text, CONTEXT_EXCERPT_LEN);
+    })();
+    pushContext(state, {
+      event: "tool", tool, ts,
+      input_excerpt: excerpt(JSON.stringify(input.tool_input || {}), CONTEXT_EXCERPT_LEN),
+      response_excerpt: respExcerpt,
+      is_error: !!(toolResponse && toolResponse.is_error),
+    });
+
    let struggleEmittedThisTurn = null;
    const emit = (entry) => {
-      if (STRUGGLE_TYPES.has(entry.type)) struggleEmittedThisTurn = entry.type;
+      if (STRUGGLE_TYPES.has(entry.type)) {
+        entry.context_window = snapshotContext(state);
+        struggleEmittedThisTurn = entry.type;
+      }
      appendJournal(entry);
    };