Files
compaction-mcp/store.go
T
lukaszraczylo dded4ec04c Add release infrastructure and complete implementation
- Dockerfile: distroless container for MCP server
- GoReleaser: multi-platform binary and Docker builds with cosign signing
- GitHub Actions: release workflow using shared actions
- Semver config for automatic version calculation
- Persistence layer, content indexing, and improved tool handlers
2026-03-07 18:31:00 +00:00

653 lines
14 KiB
Go

package main
import (
"crypto/rand"
"errors"
"fmt"
"math"
"sort"
"strings"
"sync"
"time"
)
const (
maxItems = 10000
maxContentBytes = 1 << 20 // 1 MiB
maxDedupCandidates = 500
)
type Item struct {
CreatedAt time.Time
AccessedAt time.Time
Content string
Summary string
ID string
Tags []string
Tokens int
AccessCount int
Importance int
ContentType ContentType
Pinned bool
}
type SummaryCandidate struct {
ID string
Preview string
Tokens int
}
type CompactResult struct {
NeedsSummary []SummaryCandidate
TokensFreed int
TokensBefore int
TokensAfter int
Evicted int
Summarized int
Deduplicated int
}
type BulkItem struct {
Content string
Summary string
Tags []string
Importance int
}
type Store struct {
items map[string]*Item
index *Index
tokenBudget int
usedTokens int
autoCompactThreshold float64
mu sync.Mutex
autoCompact bool
}
func NewStore(tokenBudget int) *Store {
return &Store{
items: make(map[string]*Item),
index: NewIndex(),
tokenBudget: tokenBudget,
autoCompact: true,
autoCompactThreshold: 0.9,
}
}
func (s *Store) Add(content, summary string, tags []string, importance int) (*Item, error) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.items) >= maxItems {
return nil, errors.New("item limit reached (max 10000)")
}
if len(content) > maxContentBytes {
return nil, fmt.Errorf("content too large (%d bytes, max %d)", len(content), maxContentBytes)
}
if importance < 1 {
importance = 5
} else if importance > 10 {
importance = 10
}
ct := DetectContentType(content)
if len(tags) == 0 {
tags = AutoTags(content)
}
tokens := EstimateTokens(content)
item := &Item{
ID: newID(),
Content: content,
Summary: summary,
Tags: tags,
Importance: importance,
ContentType: ct,
CreatedAt: time.Now(),
AccessedAt: time.Now(),
Tokens: tokens,
}
s.items[item.ID] = item
s.usedTokens += tokens
s.index.Add(item.ID, content, tags)
if s.autoCompact && s.tokenBudget > 0 {
if float64(s.usedTokens)/float64(s.tokenBudget) > s.autoCompactThreshold {
s.compactLocked(0.8, false)
}
}
return item, nil
}
func (s *Store) Get(id string) (Item, bool) {
s.mu.Lock()
defer s.mu.Unlock()
item, ok := s.items[id]
if ok {
item.AccessedAt = time.Now()
item.AccessCount++
return *item, true
}
return Item{}, false
}
func (s *Store) Remove(id string) bool {
s.mu.Lock()
defer s.mu.Unlock()
item, ok := s.items[id]
if !ok {
return false
}
s.usedTokens -= item.Tokens
s.index.Remove(id)
delete(s.items, id)
return true
}
func (s *Store) Pin(id string) bool {
s.mu.Lock()
defer s.mu.Unlock()
item, ok := s.items[id]
if ok {
item.Pinned = true
}
return ok
}
func (s *Store) Unpin(id string) bool {
s.mu.Lock()
defer s.mu.Unlock()
item, ok := s.items[id]
if ok {
item.Pinned = false
}
return ok
}
func (s *Store) UpdateSummary(id, summary string) bool {
s.mu.Lock()
defer s.mu.Unlock()
item, ok := s.items[id]
if ok {
item.Summary = summary
}
return ok
}
func (s *Store) Query(query string, tags []string, limit int) []Item {
s.mu.Lock()
defer s.mu.Unlock()
if limit <= 0 {
limit = 10
}
type scored struct {
item *Item
score float64
}
var results []scored
if query != "" {
// BM25 search ranks results by relevance
searchResults := s.index.Search(query, 0)
for _, sr := range searchResults {
item, ok := s.items[sr.ID]
if !ok {
continue
}
if len(tags) > 0 && !hasAnyTag(item, tags) {
continue
}
// Combine BM25 relevance with item score
results = append(results, scored{item: item, score: sr.Score + s.scoreLocked(item)})
}
} else {
// No query text — filter by tags, sort by score
for _, item := range s.items {
if len(tags) > 0 && !hasAnyTag(item, tags) {
continue
}
results = append(results, scored{item: item, score: s.scoreLocked(item)})
}
}
sort.Slice(results, func(i, j int) bool {
return results[i].score > results[j].score
})
if len(results) > limit {
results = results[:limit]
}
// Only bump AccessedAt/AccessCount on items that made the cut
out := make([]Item, len(results))
for i, r := range results {
r.item.AccessedAt = time.Now()
r.item.AccessCount++
out[i] = *r.item
}
return out
}
func (s *Store) ListItems(offset, limit int) ([]Item, int) {
s.mu.Lock()
defer s.mu.Unlock()
total := len(s.items)
// Collect and sort by creation time descending
all := make([]*Item, 0, total)
for _, item := range s.items {
all = append(all, item)
}
sort.Slice(all, func(i, j int) bool {
return all[i].CreatedAt.After(all[j].CreatedAt)
})
if offset < 0 {
offset = 0
}
if limit <= 0 {
limit = 20
}
if offset >= len(all) {
return nil, total
}
end := offset + limit
if end > len(all) {
end = len(all)
}
out := make([]Item, end-offset)
for i, item := range all[offset:end] {
out[i] = *item
}
return out, total
}
func (s *Store) BulkAdd(items []BulkItem) ([]*Item, []error) {
results := make([]*Item, len(items))
errs := make([]error, len(items))
for i, bi := range items {
item, err := s.Add(bi.Content, bi.Summary, bi.Tags, bi.Importance)
results[i] = item
errs[i] = err
}
return results, errs
}
func (s *Store) Export(summariesOnly bool) []Item {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]Item, 0, len(s.items))
for _, item := range s.items {
cp := *item
if summariesOnly && cp.Summary != "" {
cp.Content = cp.Summary
cp.Tokens = EstimateTokens(cp.Content)
}
out = append(out, cp)
}
sort.Slice(out, func(i, j int) bool {
return out[i].CreatedAt.Before(out[j].CreatedAt)
})
return out
}
// Recall returns status info and the top items by retention score.
// Designed as a single "session start" call to restore working context.
func (s *Store) Recall(limit int) (budget, used, count int, usage float64, items []Item) {
s.mu.Lock()
defer s.mu.Unlock()
budget = s.tokenBudget
used = s.usedTokens
count = len(s.items)
if budget > 0 {
usage = float64(used) / float64(budget)
}
if count == 0 || limit <= 0 {
return
}
type scored struct {
item *Item
score float64
}
all := make([]scored, 0, count)
for _, item := range s.items {
all = append(all, scored{item: item, score: s.scoreLocked(item)})
}
sort.Slice(all, func(i, j int) bool {
return all[i].score > all[j].score
})
n := limit
if n > len(all) {
n = len(all)
}
items = make([]Item, n)
for i := 0; i < n; i++ {
items[i] = *all[i].item
}
return
}
func (s *Store) scoreLocked(item *Item) float64 {
if item.Pinned {
return math.MaxFloat64
}
halfLife := DecayHalfLifeMinutes(item.ContentType)
age := time.Since(item.AccessedAt).Minutes()
recency := math.Exp(-age / halfLife)
importance := float64(item.Importance) / 10.0
importance *= ScoreMultiplier(item.ContentType)
access := math.Min(math.Log1p(float64(item.AccessCount))/5.0, 1.0)
var sizePenalty float64
if s.tokenBudget > 0 {
sizePenalty = float64(item.Tokens) / float64(s.tokenBudget)
}
return (0.4 * importance) + (0.3 * recency) + (0.2 * access) - (0.1 * sizePenalty)
}
func (s *Store) Status() (budget, used, count int, usage float64) {
s.mu.Lock()
defer s.mu.Unlock()
budget = s.tokenBudget
used = s.usedTokens
count = len(s.items)
if budget > 0 {
usage = float64(used) / float64(budget)
}
return
}
func (s *Store) BudgetTight() bool {
s.mu.Lock()
defer s.mu.Unlock()
if s.tokenBudget <= 0 {
return false
}
return float64(s.usedTokens)/float64(s.tokenBudget) > 0.8
}
func (s *Store) Configure(budget int, autoCompact *bool, threshold float64) {
s.mu.Lock()
defer s.mu.Unlock()
if budget > 0 {
s.tokenBudget = budget
}
if autoCompact != nil {
s.autoCompact = *autoCompact
}
if threshold > 0 && threshold <= 1.0 {
s.autoCompactThreshold = threshold
}
}
func (s *Store) AutoCompact() bool {
s.mu.Lock()
defer s.mu.Unlock()
return s.autoCompact
}
func (s *Store) AutoCompactThreshold() float64 {
s.mu.Lock()
defer s.mu.Unlock()
return s.autoCompactThreshold
}
func (s *Store) Compact(targetUsage float64) CompactResult {
s.mu.Lock()
defer s.mu.Unlock()
return s.compactLocked(targetUsage, true)
}
func (s *Store) compactLocked(targetUsage float64, fullCompaction bool) CompactResult {
result := CompactResult{TokensBefore: s.usedTokens}
if s.tokenBudget <= 0 {
result.TokensAfter = s.usedTokens
return result
}
targetTokens := int(float64(s.tokenBudget) * targetUsage)
if s.usedTokens <= targetTokens {
result.TokensAfter = s.usedTokens
return result
}
// Phase 1: Summary promotion — replace content with summary to save tokens
// Sort candidates by score ascending so we promote lowest-scoring items first
type promoCandidate struct {
item *Item
score float64
}
var promoCandidates []promoCandidate
for _, item := range s.items {
if item.Pinned || item.Summary == "" || item.Content == item.Summary {
continue
}
promoCandidates = append(promoCandidates, promoCandidate{item: item, score: s.scoreLocked(item)})
}
sort.Slice(promoCandidates, func(i, j int) bool {
return promoCandidates[i].score < promoCandidates[j].score
})
for _, pc := range promoCandidates {
if s.usedTokens <= targetTokens {
break
}
item := pc.item
oldTokens := item.Tokens
item.Content = item.Summary
item.Tokens = EstimateTokens(item.Content)
saved := oldTokens - item.Tokens
if saved > 0 {
s.usedTokens -= saved
result.Summarized++
result.TokensFreed += saved
s.index.Add(item.ID, item.Content, item.Tags)
}
}
// Phase 2: Deduplication — merge items with >70% word overlap (full compaction only)
if fullCompaction {
ids := make([]string, 0, len(s.items))
for id := range s.items {
ids = append(ids, id)
}
// Cap dedup candidates
if len(ids) > maxDedupCandidates {
// Sort by score ascending to prioritize merging low-value items
sort.Slice(ids, func(i, j int) bool {
return s.scoreLocked(s.items[ids[i]]) < s.scoreLocked(s.items[ids[j]])
})
ids = ids[:maxDedupCandidates]
}
merged := make(map[string]bool)
for i := 0; i < len(ids); i++ {
if merged[ids[i]] {
continue
}
a := s.items[ids[i]]
if a == nil || a.Pinned {
continue
}
aWords := wordSet(a.Content)
for j := i + 1; j < len(ids); j++ {
if merged[ids[j]] {
continue
}
b := s.items[ids[j]]
if b == nil {
continue
}
if jaccardSimilarity(aWords, wordSet(b.Content)) > 0.7 {
// Keep higher-scoring item, merge tags
if s.scoreLocked(a) >= s.scoreLocked(b) {
a.Tags = mergeTags(a.Tags, b.Tags)
if a.Summary == "" && b.Summary != "" {
a.Summary = b.Summary
}
s.usedTokens -= b.Tokens
result.TokensFreed += b.Tokens
s.index.Remove(ids[j])
delete(s.items, ids[j])
merged[ids[j]] = true
} else {
b.Tags = mergeTags(b.Tags, a.Tags)
if b.Summary == "" && a.Summary != "" {
b.Summary = a.Summary
}
s.usedTokens -= a.Tokens
result.TokensFreed += a.Tokens
s.index.Remove(ids[i])
delete(s.items, ids[i])
merged[ids[i]] = true
result.Deduplicated++
break
}
result.Deduplicated++
}
}
}
}
// Phase 3: Evict lowest-scoring non-pinned items
if s.usedTokens > targetTokens {
evictable := make([]*Item, 0)
for _, item := range s.items {
if !item.Pinned {
evictable = append(evictable, item)
}
}
sort.Slice(evictable, func(i, j int) bool {
return s.scoreLocked(evictable[i]) < s.scoreLocked(evictable[j])
})
for _, item := range evictable {
if s.usedTokens <= targetTokens {
break
}
s.usedTokens -= item.Tokens
result.TokensFreed += item.Tokens
s.index.Remove(item.ID)
delete(s.items, item.ID)
result.Evicted++
}
}
// Collect items that could benefit from LLM summarization
for _, item := range s.items {
if item.Summary == "" && item.Tokens > 100 {
preview := item.Content
if len(preview) > 80 {
preview = preview[:80]
}
result.NeedsSummary = append(result.NeedsSummary, SummaryCandidate{
ID: item.ID,
Tokens: item.Tokens,
Preview: preview,
})
}
}
result.TokensAfter = s.usedTokens
return result
}
// --- helpers ---
func newID() string {
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
panic("crypto/rand: " + err.Error())
}
return fmt.Sprintf("%x", b)
}
func wordSet(s string) map[string]struct{} {
words := strings.Fields(strings.ToLower(s))
set := make(map[string]struct{}, len(words))
for _, w := range words {
set[w] = struct{}{}
}
return set
}
func jaccardSimilarity(a, b map[string]struct{}) float64 {
if len(a) == 0 && len(b) == 0 {
return 0
}
intersection := 0
for w := range a {
if _, ok := b[w]; ok {
intersection++
}
}
union := len(a) + len(b) - intersection
if union == 0 {
return 0
}
return float64(intersection) / float64(union)
}
func hasAnyTag(item *Item, tags []string) bool {
tagSet := make(map[string]struct{}, len(item.Tags))
for _, t := range item.Tags {
tagSet[strings.ToLower(t)] = struct{}{}
}
for _, t := range tags {
if _, ok := tagSet[strings.ToLower(t)]; ok {
return true
}
}
return false
}
func mergeTags(a, b []string) []string {
seen := make(map[string]struct{})
var result []string
for _, t := range a {
lower := strings.ToLower(t)
if _, ok := seen[lower]; !ok {
seen[lower] = struct{}{}
result = append(result, t)
}
}
for _, t := range b {
lower := strings.ToLower(t)
if _, ok := seen[lower]; !ok {
seen[lower] = struct{}{}
result = append(result, t)
}
}
return result
}