mirror of
https://github.com/lukaszraczylo/claude-adam.git
synced 2026-06-29 02:52:39 +00:00
feat: apply MOSS-grounded self-evolution improvements to ADAM
Implements 7 improvements grounded in MOSS paper (arXiv 2605.22794): 1. Transcript capture (§3.4): context_ring buffer in adam-observe.mjs captures last 8 events around struggle signals as context_window. 2. Evidence batching (§3.1): new adam-batch.mjs pre-clusters windowed journal entries into coherent failure batches by (signal_type, cluster_key). 3. Multi-stage analysis (§3.3): SKILL.md dispatches adam agent in two stages (diagnose+plan → implement) with inter-stage validation gate. 4. Pre-apply verification (§3.4): 4-check deterministic gate before auto-apply (source entries exist, diagnosis grounded, type-evidence match, no conflicting recent proposals). 5. Auto-rollback (§3.5): new adam-rollback.mjs reverts regressed proposals detected by A/B measurement, creates regression nudges. 6. Harness self-modification (§1 Table 1): new harness_edit proposal type targeting adam's own scripts with stricter gates (confidence≥5, never auto-apply, test-suite-gated). 7. Keypoint matrix evaluation (§4.2): 5 capability dimensions (tool_selection, scope_discipline, error_recovery, first_attempt, build_reliability) scored per batch for structured evaluation. Test suite: 94 → 114 tests (20 new), all passing.
This commit is contained in:
+42
-2
@@ -107,10 +107,15 @@ const CLEAN_RECOVERY_WINDOW = 3;
|
||||
const SILENT_DRIFT_THRESHOLD = 5;
|
||||
const ERROR_AFTER_RECOVERY_WINDOW = 5;
|
||||
const RECENT_RECOVERIES_MAX = 3;
|
||||
const STRUGGLE_TYPES = new Set(["tool_error_loop", "dead_end", "retry_loop"]);
|
||||
const STRUGGLE_TYPES = new Set([
|
||||
"tool_error_loop", "dead_end", "retry_loop", "weak_agent",
|
||||
"edit_churn", "build_loop", "silent_drift", "error_after_recovery",
|
||||
]);
|
||||
const ACTIVE_SKILLS_LOOKBACK = 10;
|
||||
const TASK_TOOL_MIN = 5;
|
||||
const TASK_DIVERSITY_MIN = 3;
|
||||
const CONTEXT_RING_SIZE = 8;
|
||||
const CONTEXT_EXCERPT_LEN = 200;
|
||||
const STATE_MAX_BYTES = 1_000_000;
|
||||
|
||||
function safeRead(path, fallback) {
|
||||
@@ -226,6 +231,20 @@ function activeNames(state, kind) {
|
||||
return [...seen];
|
||||
}
|
||||
|
||||
function excerpt(text, len) {
|
||||
if (!text || typeof text !== "string") return null;
|
||||
return text.length > len ? text.slice(0, len) + "…" : text;
|
||||
}
|
||||
|
||||
function pushContext(state, entry) {
|
||||
state.context_ring.push(entry);
|
||||
if (state.context_ring.length > CONTEXT_RING_SIZE) state.context_ring.shift();
|
||||
}
|
||||
|
||||
function snapshotContext(state) {
|
||||
return state.context_ring.length ? state.context_ring.slice() : undefined;
|
||||
}
|
||||
|
||||
function errorFingerprint(toolResponse) {
|
||||
if (!toolResponse) return null;
|
||||
let text = "";
|
||||
@@ -290,6 +309,7 @@ function resetSessionLocal(state) {
|
||||
state.recentRecoveries = [];
|
||||
state.session_post_count = 0;
|
||||
state.tool_window = [];
|
||||
state.context_ring = [];
|
||||
state.task_tool_kinds = {};
|
||||
state.task_tool_count = 0;
|
||||
state.task_corrections = 0;
|
||||
@@ -316,6 +336,7 @@ function ensureStateDefaults(state) {
|
||||
if (typeof state.silentDriftEmitted !== "boolean") state.silentDriftEmitted = false;
|
||||
if (!Array.isArray(state.recentRecoveries)) state.recentRecoveries = [];
|
||||
if (typeof state.session_post_count !== "number") state.session_post_count = 0;
|
||||
if (!Array.isArray(state.context_ring)) state.context_ring = [];
|
||||
}
|
||||
|
||||
function main() {
|
||||
@@ -340,6 +361,7 @@ function main() {
|
||||
|
||||
if (event === "UserPromptSubmit") {
|
||||
const prompt = (input.prompt || "").slice(0, 200);
|
||||
pushContext(state, { event: "user", prompt: excerpt(prompt, CONTEXT_EXCERPT_LEN), ts });
|
||||
if (isCorrection(prompt)) {
|
||||
const last = state.tool_window[state.tool_window.length - 1] || {};
|
||||
appendJournal({
|
||||
@@ -406,9 +428,27 @@ function main() {
|
||||
const argsHash = djb2(JSON.stringify(input.tool_input || {}));
|
||||
const file = (input.tool_input && (input.tool_input.file_path || input.tool_input.path)) || null;
|
||||
|
||||
const toolResponse = input.tool_response;
|
||||
const respExcerpt = (() => {
|
||||
if (!toolResponse) return null;
|
||||
const text = typeof toolResponse === "string" ? toolResponse
|
||||
: typeof toolResponse.content === "string" ? toolResponse.content
|
||||
: null;
|
||||
return excerpt(text, CONTEXT_EXCERPT_LEN);
|
||||
})();
|
||||
pushContext(state, {
|
||||
event: "tool", tool, ts,
|
||||
input_excerpt: excerpt(JSON.stringify(input.tool_input || {}), CONTEXT_EXCERPT_LEN),
|
||||
response_excerpt: respExcerpt,
|
||||
is_error: !!(toolResponse && toolResponse.is_error),
|
||||
});
|
||||
|
||||
let struggleEmittedThisTurn = null;
|
||||
const emit = (entry) => {
|
||||
if (STRUGGLE_TYPES.has(entry.type)) struggleEmittedThisTurn = entry.type;
|
||||
if (STRUGGLE_TYPES.has(entry.type)) {
|
||||
entry.context_window = snapshotContext(state);
|
||||
struggleEmittedThisTurn = entry.type;
|
||||
}
|
||||
appendJournal(entry);
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user