diff --git a/README.md b/README.md index 913bf7b..2169cdc 100644 --- a/README.md +++ b/README.md @@ -36,15 +36,16 @@ LLM coding sessions reveal repeated friction the moment you stop and look. ADAM ├── skills/adam-self-improvement/SKILL.md # /reflect protocol ├── commands/reflect.md # /reflect slash command └── adam/ - ├── journal.jsonl # append-only signal log - ├── journal/ # rotated daily logs (>5 MB threshold) - ├── state.json # cursor + per-session counters - ├── usage.json # skill/agent invocation tallies + ├── journal.jsonl # append-only signal log (active observations) + ├── journal/ # rotated daily logs + actioned-.jsonl per applied/rejected proposal + ├── state.json # per-session counters (cursor field is vestigial as of v0.2.0) + ├── usage.json # skill/agent invocation tallies + payload visibility counters ├── proposals/ # queued, awaiting review ├── applied/ # approved + auto-applied archive ├── rejected/ # rejected (with reason) ├── trash/ # soft-deleted artifacts (recoverable) - └── tests/run-tests.sh # 18 verification tests + ├── scripts/ # adam-archive.mjs (called by skill on apply/reject) + └── tests/run-tests.sh # 21 verification tests ``` ## Install @@ -71,7 +72,7 @@ After install: ``` Sum: +2 Signal repeated ≥3× across ≥2 sessions -+2 Struggle signal repeated ≥3× within a single session (does not stack with above) ++2 Struggle signal appearing ≥1× within a single session (does not stack) +2 Transcript contains positive endorsement near related action +1 Multi-axis cluster (≥2 distinct struggle types in same session) -1 Type-bias penalty (≥3 rejections, applied:rejected <1:2) @@ -88,6 +89,14 @@ auto_apply_eligible requires ALL: cross_session_evidence == true (single-session-only proposals always queue) ``` +## Lifecycle: how proposals become permanent + +Every proposal records the journal entry timestamps that fed its cluster (`source_entries` in frontmatter). When you apply or reject a proposal, the skill calls `adam/scripts/adam-archive.mjs` which moves matching entries from `journal.jsonl` to `journal/actioned-.jsonl`. Effects: + +- The `journal.jsonl` stays bounded by **active** observations only. +- The next `/reflect` reads applied/ + rejected/ frontmatter, builds an excluded-timestamps set, and skips any leftover journal entries that were already actioned. +- Rule changes (e.g. lowering a threshold) immediately re-evaluate the remaining active observations — no manual cursor rewind needed. + ## What it will not do - No background LLM spend. The analyst runs only when you invoke `/reflect`. diff --git a/adam/scripts/adam-archive.mjs b/adam/scripts/adam-archive.mjs new file mode 100755 index 0000000..2b93aa5 --- /dev/null +++ b/adam/scripts/adam-archive.mjs @@ -0,0 +1,117 @@ +#!/usr/bin/env node +// Usage: adam-archive.mjs +// Reads `source_entries` from proposal frontmatter, moves matching journal +// entries from journal.jsonl to journal/actioned-.jsonl. Used by the +// adam-self-improvement skill after each apply/reject so subsequent /reflect +// runs do not re-cluster already-actioned signals. + +import { readFileSync, writeFileSync, appendFileSync, mkdirSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import { homedir } from "node:os"; + +const ROOT = join(homedir(), ".claude", "adam"); +const JOURNAL = join(ROOT, "journal.jsonl"); +const JOURNAL_DIR = join(ROOT, "journal"); + +function parseFrontmatter(content) { + const m = content.match(/^---\n([\s\S]*?)\n---/); + if (!m) return {}; + const fm = {}; + const lines = m[1].split("\n"); + let i = 0; + while (i < lines.length) { + const line = lines[i]; + const idx = line.indexOf(":"); + if (idx === -1) { i++; continue; } + const key = line.slice(0, idx).trim(); + const value = line.slice(idx + 1).trim(); + if (key === "source_entries") { + const arr = []; + if (value.startsWith("[") && value.endsWith("]")) { + const inner = value.slice(1, -1) + .split(",") + .map(s => s.trim().replace(/^['"]|['"]$/g, "")); + arr.push(...inner.filter(Boolean)); + fm[key] = arr; + i++; + continue; + } + i++; + while (i < lines.length && /^\s*-\s+/.test(lines[i])) { + const item = lines[i].replace(/^\s*-\s+/, "").trim().replace(/^['"]|['"]$/g, ""); + if (item) arr.push(item); + i++; + } + fm[key] = arr; + continue; + } + fm[key] = value; + i++; + } + return fm; +} + +function main() { + const proposalPath = process.argv[2]; + if (!proposalPath) { + console.error("usage: adam-archive.mjs "); + process.exit(2); + } + + let proposal; + try { + proposal = readFileSync(proposalPath, "utf8"); + } catch (e) { + console.error(`cannot read ${proposalPath}: ${e.message}`); + process.exit(1); + } + + const fm = parseFrontmatter(proposal); + const id = fm.id || "unknown"; + const sourceEntries = Array.isArray(fm.source_entries) ? fm.source_entries : []; + + if (sourceEntries.length === 0) { + console.log(`${id}: no source_entries in frontmatter — nothing to archive`); + return; + } + + if (!existsSync(JOURNAL)) { + console.log(`${id}: journal does not exist at ${JOURNAL}`); + return; + } + + const lines = readFileSync(JOURNAL, "utf8").split("\n").filter(Boolean); + const tsSet = new Set(sourceEntries); + const matched = []; + const remaining = []; + + for (const line of lines) { + try { + const e = JSON.parse(line); + if (e.ts && tsSet.has(e.ts)) { + matched.push(line); + } else { + remaining.push(line); + } + } catch { + remaining.push(line); + } + } + + if (matched.length === 0) { + console.log(`${id}: no matching entries in journal (already archived?)`); + return; + } + + mkdirSync(JOURNAL_DIR, { recursive: true }); + const archivePath = join(JOURNAL_DIR, `actioned-${id}.jsonl`); + appendFileSync(archivePath, matched.join("\n") + "\n"); + writeFileSync(JOURNAL, remaining.length ? remaining.join("\n") + "\n" : ""); + + console.log(`${id}: archived ${matched.length}/${lines.length} entries → ${archivePath}`); +} + +try { main(); } catch (e) { + console.error(`error: ${e.message}`); + process.exit(1); +} diff --git a/adam/tests/run-tests.sh b/adam/tests/run-tests.sh index 8036de3..7baa025 100755 --- a/adam/tests/run-tests.sh +++ b/adam/tests/run-tests.sh @@ -215,6 +215,70 @@ else echo " PASS: build_loop correctly ignored non-build command"; PASS=$((PASS+1)) fi +# --- Test 17: adam-archive moves matching entries to actioned file --- +echo "Test 17: adam-archive moves matching journal entries" +ARCHIVE="$HOME/.claude/adam/scripts/adam-archive.mjs" +reset_state +rm -f "$ROOT/journal/actioned-test-archive-001.jsonl" +cat > "$ROOT/journal.jsonl" < /tmp/adam-test-17/proposal.md </dev/null 2>&1 || true +remaining=$(wc -l < "$ROOT/journal.jsonl" | tr -d ' ') +archived=$(wc -l < "$ROOT/journal/actioned-test-archive-001.jsonl" 2>/dev/null | tr -d ' ' || echo 0) +if [ "$remaining" = "1" ] && [ "$archived" = "2" ]; then + echo " PASS: archive moved 2 matching, kept 1 unmatched"; PASS=$((PASS+1)) +else + echo " FAIL: expected 1 remaining + 2 archived, got $remaining + $archived"; FAIL=$((FAIL+1)) +fi +rm -rf /tmp/adam-test-17 "$ROOT/journal/actioned-test-archive-001.jsonl" + +# --- Test 18: adam-archive no-op when source_entries missing --- +echo "Test 18: adam-archive no-op when source_entries missing" +reset_state +echo '{"ts":"2026-01-01T00:00:00Z","type":"correction"}' > "$ROOT/journal.jsonl" +mkdir -p /tmp/adam-test-18 +cat > /tmp/adam-test-18/proposal.md </dev/null 2>&1 || true +if [ -f "$ROOT/journal/actioned-test-noop-002.jsonl" ]; then + echo " FAIL: archive file created when no source_entries"; FAIL=$((FAIL+1)) +else + echo " PASS: no archive file created"; PASS=$((PASS+1)) +fi +remaining=$(wc -l < "$ROOT/journal.jsonl" | tr -d ' ') +if [ "$remaining" = "1" ]; then + echo " PASS: journal unchanged"; PASS=$((PASS+1)) +else + echo " FAIL: journal modified ($remaining lines, expected 1)"; FAIL=$((FAIL+1)) +fi +rm -rf /tmp/adam-test-18 + echo echo "Results: $PASS passed, $FAIL failed" [ "$FAIL" = "0" ] diff --git a/agents/adam.md b/agents/adam.md index bcc0a46..3f4b027 100644 --- a/agents/adam.md +++ b/agents/adam.md @@ -46,17 +46,18 @@ The hook emits these `type` values into the journal: ## Process -1. Read `state.json` → `cursor` (number of journal lines already processed). -2. Read `journal.jsonl`. New observations = lines after `cursor`. -3. If 0 new lines, emit punch list `{"new":0}` and stop. -4. **Build feedback context** (run once per `/reflect`): - a. List `rejected_dir/` filenames. Parse each `# Why` and `# Reason` sections. Build a set of rejected ideas (token-tokenized for similarity matching). - b. List `applied_dir/` filenames. Parse frontmatter `type` and `target`. Tally `applied_by_type[type]` and `applied_by_target[basename(target)]`. - c. From these, compute **type biases**: +1. **Build feedback context** (run once per `/reflect`): + a. List `rejected_dir/` filenames. Parse each frontmatter `source_entries` (if present), `# Why` and `# Reason` sections. + b. List `applied_dir/` filenames. Parse each frontmatter `type`, `target`, `source_entries`. Tally `applied_by_type[type]`. + c. Compute the **excluded-timestamps set**: union of all `source_entries` arrays across `applied_dir/` + `rejected_dir/`. Journal entries with these `ts` values have already been actioned and MUST NOT be re-clustered. + d. Build the **rejected-ideas set** (token-tokenized `# Why` content) for fuzzy fallback matching when a new cluster topic resembles a rejected one but doesn't share `source_entries` (handles legacy proposals without `source_entries`). + e. Compute **type biases**: - Types with applied:rejected ratio >2:1 (over ≥3 total): neutral, no bonus. - Types with applied:rejected ratio <1:2 (over ≥3 rejections): **-1 confidence penalty**, recorded in proposal `# Why` as "type-bias-penalty: ". -5. Cluster new observations: - - `correction`: tokenize phrase (drop stopwords, keep content tokens). Phrases sharing ≥2 content tokens collapse into one cluster — regardless of `prev_tool` or `cwd`. Record distinct cwds in cluster (used for CLAUDE.md eligibility). +2. Read `journal.jsonl`. Filter out entries whose `ts` is in the excluded-timestamps set. The result = **active observations**. +3. If 0 active observations, emit punch list `{"new":0}` and stop. +4. Cluster active observations: + - `correction`: tokenize phrase (drop stopwords, keep content tokens). Phrases sharing ≥2 content tokens collapse into one cluster — regardless of `prev_tool` or `cwd`. Record distinct cwds (used for CLAUDE.md eligibility). - `retry_loop`: cluster by `tool`. - `weak_agent`: cluster by `subagent_type`. - `tool_error_loop`: cluster by `fp`. @@ -64,26 +65,26 @@ The hook emits these `type` values into the journal: - `edit_churn`: cluster by file basename pattern (e.g. `*.test.ts`). - `build_loop`: cluster by `session`. - `subagent_dispatch_pattern`: cluster by `subagent_type`. -6. **Multi-axis correlation**: for each session that produced ≥2 distinct struggle types (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`), tag clusters from that session as `multi_axis: true`. This grants +1 confidence at scoring. -7. For each cluster qualifying under the rubric — ≥3 occurrences across ≥2 sessions, OR (for struggle types `tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`) ≥1 entry within a single session, OR (for `correction`) ≥3 occurrences across ≥2 cwds: - a. If cluster topic matches a rejected idea (≥2 token overlap with rejection's `# Why`), skip with reason `"rejected-similar"`. +5. **Multi-axis correlation**: for each session that produced ≥2 distinct struggle types (`tool_error_loop`, `dead_end`, `weak_agent`, `retry_loop`, `edit_churn`, `build_loop`), tag clusters from that session as `multi_axis: true`. This grants +1 confidence at scoring. +6. For each cluster qualifying under the rubric — ≥3 occurrences across ≥2 sessions, OR (for struggle types) ≥1 entry within a single session, OR (for `correction`) ≥3 occurrences across ≥2 cwds: + a. If cluster topic matches a rejected idea via the rejected-ideas fuzzy set (≥2 token overlap with rejection's `# Why`), skip with reason `"rejected-similar"`. b. Pull ~20 messages of transcript context from `transcripts_root` to enrich. Never read full transcripts. - c. **Solution synthesis** (when type would be `skill_new` AND cluster qualifies for proposal): pull additional ~30 messages of transcript window around the friction events (~50 messages total). Extract: + c. **Solution synthesis** (when candidate type is `skill_new` AND cluster qualifies): pull additional ~30 messages around friction events (~50 messages total). Extract: - Concrete trigger phrases the user says verbatim. - Tools / files involved. - Successful resolution patterns later in transcript (positive endorsement). - Counterexamples (false-positive triggers to exclude). - d. **Skill overlap check** (skill_new candidates only): see "Skill overlap rule" below. If overlap qualifies, switch type to `skill_edit` targeting the matched SKILL.md. + d. **Skill overlap check** (`skill_new` only): see "Skill overlap rule". If overlap qualifies, switch type to `skill_edit` targeting matched SKILL.md. e. **Draft full content**: - - `skill_new`: draft the complete SKILL.md per "Skill drafting protocol" below. `# Proposed change` contains the full file body. - - `skill_edit`: draft an append-only unified diff per "Skill overlap rule". - - `memory`: draft full memory file content (frontmatter + body). - - Other types: per existing rules (unified diff or full content). + - `skill_new`: complete SKILL.md per "Skill drafting protocol". + - `skill_edit`: append-only unified diff per "Skill overlap rule". + - `memory`: complete memory file per "Memory drafting protocol". + - Other: per existing rules (unified diff or full content). f. Score against rubric → `confidence`, `blast_radius`, `cross_session_evidence`, `multi_axis`, `auto_apply_eligible`. - g. Apply feedback bias (step 4c) and multi-axis bonus. - h. Emit proposal file to `proposals_dir/`. -8. Update `cursor` in `state.json` to new line count. -9. Emit punch list to stdout (last message): `{"new":N, "high_confidence":[...], "queued":[...], "skipped":[...]}`. + g. Apply feedback bias (step 1e) and multi-axis bonus. + h. **Record `source_entries`**: list every journal entry timestamp that fed this cluster. Goes in proposal frontmatter as a YAML block-form array (one `- ""` per line). The skill consumes this on apply/reject to archive matching entries out of `journal.jsonl` and into `journal/actioned-.jsonl`. + i. Emit proposal file to `proposals_dir/`. +7. Emit punch list to stdout (last message): `{"new":N, "high_confidence":[...], "queued":[...], "skipped":[...]}`. The `cursor` field in `state.json` is vestigial as of v0.2.0 — do not read or write it. ## Skill overlap rule @@ -143,6 +144,34 @@ When the main thread applies a `skill_new` proposal: 2. Writes the `# Proposed change` body to `/SKILL.md`. 3. Tells the user: "skill `` written. Activates immediately on next user turn (CC v2.1.0+ auto-hot-reload)." +## Memory drafting protocol (for `memory` proposals) + +Every `memory` proposal's `# Proposed change` section MUST contain the COMPLETE memory file body — frontmatter + content — that will be written to the target path under `~/.claude/projects//memory/.md`. + +Required structure: + +```markdown +--- +name: +description: +type: user | feedback | project | reference +originSessionId: +--- + + +``` + +Constraints: +- Frontmatter fields `name`, `description`, `type` are **required**. Skill enforces this at apply time. +- `originSessionId` is required — must be a `session` value from one of the cluster's journal entries. +- ≤50 LOC of body content. Surgical. +- Slug (used in `target` path filename) must not collide with any existing memory file. +- For `type=feedback` and `type=project`, body MUST contain `**Why:**` and `**How to apply:**` lines (CLAUDE.md memory schema). + ## Confidence rubric (deterministic — do NOT vibe) Sum: @@ -210,6 +239,10 @@ cross_session_evidence: true | false multi_axis: true | false auto_apply_eligible: true | false status: queued +source_entries: + - "" + - "" + - "..." --- # Why diff --git a/install.sh b/install.sh index 61a8de4..e817d95 100755 --- a/install.sh +++ b/install.sh @@ -24,6 +24,7 @@ mkdir -p \ "$DEST/adam/rejected" \ "$DEST/adam/trash" \ "$DEST/adam/journal" \ + "$DEST/adam/scripts" \ "$DEST/adam/tests/fixtures" cp "$SRC/hooks/adam-observe.mjs" "$DEST/hooks/" @@ -31,6 +32,7 @@ cp "$SRC/hooks/adam-nudge.mjs" "$DEST/hoo cp "$SRC/agents/adam.md" "$DEST/agents/" cp "$SRC/skills/adam-self-improvement/SKILL.md" "$DEST/skills/adam-self-improvement/" cp "$SRC/commands/reflect.md" "$DEST/commands/" +cp "$SRC/adam/scripts/adam-archive.mjs" "$DEST/adam/scripts/" cp "$SRC/adam/tests/run-tests.sh" "$DEST/adam/tests/" cp "$SRC/adam/tests/fixtures/seed-corrections.jsonl" "$DEST/adam/tests/fixtures/" @@ -41,7 +43,7 @@ cp "$SRC/adam/tests/fixtures/seed-corrections.jsonl" "$DEST/ada echo " files installed." echo echo " next steps:" -echo " 1. bash $DEST/adam/tests/run-tests.sh # must show: 18 passed, 0 failed" +echo " 1. bash $DEST/adam/tests/run-tests.sh # must show: 21 passed, 0 failed" echo " 2. merge settings.json.example into $DEST/settings.json" echo " 3. start a fresh Claude Code session, then run /reflect" echo diff --git a/skills/adam-self-improvement/SKILL.md b/skills/adam-self-improvement/SKILL.md index b55dd1b..8386ce2 100644 --- a/skills/adam-self-improvement/SKILL.md +++ b/skills/adam-self-improvement/SKILL.md @@ -44,9 +44,10 @@ For each id in `high_confidence`: - Verify in front of the user: print `id`, `target`, `confidence`, `blast_radius`, `cross_session_evidence`, `auto_apply_eligible`. - Apply the change: - **For `skill_new`**: `mkdir -p ~/.claude/skills//`, then `Write` the proposal's `# Proposed change` body to `~/.claude/skills//SKILL.md`. After write, print: "skill `` written to `~/.claude/skills//SKILL.md` — activates immediately — Claude Code v2.1.0+ auto-hot-reloads user-level skills, no restart needed." - - **For `memory`**: `Write` the proposal's `# Proposed change` body to the path in `target` (under `~/.claude/projects//memory/`, where `` is the user's home dir with `/` replaced by `-`, e.g. `-Users-alice` on macOS). Then update `MEMORY.md` index with a one-line pointer. + - **For `memory`**: `Write` the proposal's `# Proposed change` body (which MUST include the auto-memory frontmatter — see "Memory drafting protocol" in `agents/adam.md`) to the path in `target` (under `~/.claude/projects//memory/`, where `` is the user's home dir with `/` replaced by `-`, e.g. `-Users-alice` on macOS). Then update `MEMORY.md` index with a one-line pointer. - **For other types under auto-apply**: apply via Write/Edit per `# Proposed change`. (Note: only `memory` and `skill_new` qualify for auto-apply per the rubric.) - Move proposal to `~/.claude/adam/applied/-.md`. +- **Archive consumed journal entries**: `node ~/.claude/adam/scripts/adam-archive.mjs ~/.claude/adam/applied/-.md` — moves entries listed in proposal's `source_entries` from `journal.jsonl` to `journal/actioned-.jsonl` so subsequent `/reflect` runs do not re-cluster them. Print: `auto-applied N proposals: [ids]`. @@ -61,10 +62,11 @@ c. On **approve**: - For `deletion`: `mkdir -p ~/.claude/adam/trash/` then `mv` the artifact into it. Print restoration command. - For `skill_new`: `mkdir -p ~/.claude/skills//`, then write `# Proposed change` body to `/SKILL.md`. Tell user: "skill `` written — activates immediately (CC v2.1.0+ auto-hot-reload)." - For `skill_edit`: apply the unified diff in `# Proposed change` to the existing SKILL.md at `target` (append-only — never replace existing content). - - For `memory`: write to `target` and update `MEMORY.md` index. + - For `memory`: write `# Proposed change` body (must include auto-memory frontmatter) to `target` and update `MEMORY.md` index with a one-line pointer. - For all others: apply via Write/Edit per the proposal's `# Proposed change`. - Move proposal to `~/.claude/adam/applied/-.md`. -d. On **reject**: ask for reason in one line. Append `# Reason\n` to proposal body. Move to `~/.claude/adam/rejected/.md`. + - Archive: `node ~/.claude/adam/scripts/adam-archive.mjs ~/.claude/adam/applied/-.md`. +d. On **reject**: ask for reason in one line. Append `# Reason\n` to proposal body. Move to `~/.claude/adam/rejected/.md`. Archive: `node ~/.claude/adam/scripts/adam-archive.mjs ~/.claude/adam/rejected/.md`. e. On **edit**: ask the user for the change, edit the proposal in place, then loop back to step 3a for that same id. ### 4. Handle failures @@ -95,6 +97,8 @@ Before writing any proposal: - For `deletion`: confirm both criteria (a) and (b) from the agent's special handling are documented in the proposal. - For `skill_new`: confirm the slug doesn't collide with any existing skill in `~/.claude/skills/`. If it does, refuse and ask user to rename. - For `skill_edit`: confirm the diff is append-only (no `-` lines that remove existing content) and that target SKILL.md exists. +- For `memory`: confirm `# Proposed change` body starts with `---` frontmatter containing required fields `name`, `description`, `type`, `originSessionId`. Refuse if frontmatter missing — agent must redraft per the Memory drafting protocol. +- Confirm `source_entries` is present in proposal frontmatter as a non-empty list (used for archive). Warn (do not refuse) if missing — legacy proposals from before v0.2.0 won't have it. If any check fails, refuse to apply and ask the user how to proceed.