mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-05 23:03:55 +00:00
a1d4459329
march-improvements
64 lines
1.9 KiB
Go
64 lines
1.9 KiB
Go
// Package sanitize provides content cleaning utilities for stripping
|
|
// Claude Code internal XML artifacts from captured text.
|
|
package sanitize
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
// systemXMLTags lists Claude Code internal XML tags that should be stripped
|
|
// from captured content before processing. These are system-level artifacts
|
|
// that pollute observations and summaries when stored.
|
|
var systemXMLTags = []string{
|
|
// Claude Code task/agent system
|
|
"task-notification",
|
|
// System reminders injected by Claude Code
|
|
"system-reminder",
|
|
// Claude-mnemonic's own context injection
|
|
"relevant-memory",
|
|
// Hook output wrappers
|
|
"user-prompt-submit-hook",
|
|
// Large output persistence
|
|
"persisted-output",
|
|
// Tool loading system
|
|
"available-deferred-tools",
|
|
// Fast mode info
|
|
"fast_mode_info",
|
|
// Anthropic internal
|
|
"antml_thinking",
|
|
"antml_function_calls",
|
|
}
|
|
|
|
// systemXMLRegexps are compiled regexps for each tag, built once at init.
|
|
var systemXMLRegexps []*regexp.Regexp
|
|
|
|
func init() {
|
|
systemXMLRegexps = make([]*regexp.Regexp, len(systemXMLTags))
|
|
for i, tag := range systemXMLTags {
|
|
// Match opening tag (with optional attributes), content (including newlines), and closing tag
|
|
systemXMLRegexps[i] = regexp.MustCompile(`(?s)<` + regexp.QuoteMeta(tag) + `[^>]*>.*?</` + regexp.QuoteMeta(tag) + `>`)
|
|
}
|
|
}
|
|
|
|
// StripSystemXML removes known Claude Code internal XML blocks from text.
|
|
// This prevents system artifacts like <task-notification>, <system-reminder>,
|
|
// and <relevant-memory> from being stored in observations and summaries.
|
|
func StripSystemXML(s string) string {
|
|
// Quick check: if no angle brackets, nothing to strip
|
|
if !strings.Contains(s, "<") {
|
|
return s
|
|
}
|
|
|
|
for _, re := range systemXMLRegexps {
|
|
s = re.ReplaceAllString(s, "")
|
|
}
|
|
|
|
// Clean up resulting double-blank-lines from removed blocks
|
|
for strings.Contains(s, "\n\n\n") {
|
|
s = strings.ReplaceAll(s, "\n\n\n", "\n\n")
|
|
}
|
|
|
|
return strings.TrimSpace(s)
|
|
}
|