Files
lukaszraczylo f07875ee82 fix: plugin no longer vanishes after Claude Code updates
Root cause: plugin registered as directory source in known_marketplaces.json,
which gets wiped on CLI updates. Now registers in extraKnownMarketplaces
(settings.json) as a GitHub source — same mechanism caveman/context-mode use.

Binaries install to ~/.claude-mnemonic/bin/ instead of the Claude-managed
plugins directory. Thin wrapper scripts in the repo let the marketplace
clone find them. Nothing gets cleaned up when Claude refreshes its cache.

Also fixed along the way:
- ONNX Runtime 1.24.3 → 1.26.0 (API v25 mismatch broke all embedding tests)
- Vector client leaked on DB reinit, processQueue had a race on sessionManager
- reloadConfig called os.Exit(0) bypassing graceful shutdown
- Removed dead QueryRowWithTimeout that leaked contexts
- Added tests for graph/watcher/maintenance/update (all were at 0%)
2026-05-24 01:56:54 +01:00

728 lines
19 KiB
Go

//go:build fts5
// Package maintenance provides scheduled maintenance tasks for claude-mnemonic.
package maintenance
import (
"context"
"os"
"path/filepath"
"sync"
"testing"
"time"
"github.com/rs/zerolog"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gorm.io/gorm/logger"
"github.com/lukaszraczylo/claude-mnemonic/internal/config"
gormdb "github.com/lukaszraczylo/claude-mnemonic/internal/db/gorm"
"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
)
// testSetup creates a full maintenance service with a real temporary database.
func testSetup(t *testing.T, cfg *config.Config) (*Service, *gormdb.Store, *gormdb.ObservationStore, *gormdb.PromptStore, func()) {
t.Helper()
tmpDir, err := os.MkdirTemp("", "maintenance_test_*")
require.NoError(t, err, "create temp dir")
dbPath := filepath.Join(tmpDir, "test.db")
storeCfg := gormdb.Config{
Path: dbPath,
MaxConns: 4,
LogLevel: logger.Silent,
}
store, err := gormdb.NewStore(storeCfg)
if err != nil {
os.RemoveAll(tmpDir)
t.Fatalf("NewStore failed: %v", err)
}
observationStore := gormdb.NewObservationStore(store, nil, nil, nil)
summaryStore := gormdb.NewSummaryStore(store)
promptStore := gormdb.NewPromptStore(store, nil)
svc := NewService(store, observationStore, summaryStore, promptStore, nil, cfg, zerolog.Nop())
cleanup := func() {
store.Close()
os.RemoveAll(tmpDir)
}
return svc, store, observationStore, promptStore, cleanup
}
// defaultCfg returns a maintenance-enabled config for tests.
func defaultCfg() *config.Config {
cfg := config.Default()
cfg.MaintenanceEnabled = true
cfg.MaintenanceIntervalHours = 1
cfg.ObservationRetentionDays = 0
cfg.CleanupStaleObservations = false
return cfg
}
// insertObservation is a helper that inserts an observation and returns its ID.
func insertObservation(t *testing.T, obsStore *gormdb.ObservationStore, session, project string, seq int) int64 {
t.Helper()
obs := &models.ParsedObservation{
Type: models.ObsTypeDiscovery,
Title: "test observation",
}
id, _, err := obsStore.StoreObservation(context.Background(), session, project, obs, seq, 10)
require.NoError(t, err)
return id
}
// ---- NewService ----
func TestNewService_ReturnsNonNilService(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
assert.NotNil(t, svc)
}
func TestNewService_InitializesChannels(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
// stopCh and doneCh must be non-nil so Stop/Wait don't panic.
assert.NotNil(t, svc.stopCh)
assert.NotNil(t, svc.doneCh)
}
// ---- Stats ----
func TestStats_DefaultValues(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
stats := svc.Stats()
assert.Equal(t, true, stats["enabled"])
assert.Equal(t, 1, stats["interval_hours"])
assert.Equal(t, 0, stats["retention_days"])
assert.Equal(t, false, stats["cleanup_stale"])
assert.Equal(t, int64(0), stats["total_cleaned_obs"])
assert.Equal(t, int64(0), stats["total_optimizes"])
assert.Equal(t, false, stats["running"])
}
func TestStats_ReflectsConfigFields(t *testing.T) {
tests := []struct {
name string
cfg *config.Config
wantEnabled bool
wantHours int
wantDays int
wantStale bool
}{
{
name: "maintenance disabled",
cfg: func() *config.Config {
c := defaultCfg()
c.MaintenanceEnabled = false
return c
}(),
wantEnabled: false,
wantHours: 1,
wantDays: 0,
wantStale: false,
},
{
name: "retention and stale cleanup enabled",
cfg: func() *config.Config {
c := defaultCfg()
c.ObservationRetentionDays = 30
c.CleanupStaleObservations = true
c.MaintenanceIntervalHours = 12
return c
}(),
wantEnabled: true,
wantHours: 12,
wantDays: 30,
wantStale: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, tt.cfg)
defer cleanup()
stats := svc.Stats()
assert.Equal(t, tt.wantEnabled, stats["enabled"])
assert.Equal(t, tt.wantHours, stats["interval_hours"])
assert.Equal(t, tt.wantDays, stats["retention_days"])
assert.Equal(t, tt.wantStale, stats["cleanup_stale"])
})
}
}
// ---- Stop (idempotency) ----
func TestStop_WhenNotRunning_DoesNotPanic(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
// Service was never started — Stop must be a no-op.
assert.NotPanics(t, func() { svc.Stop() })
}
func TestStop_CalledTwice_DoesNotPanic(t *testing.T) {
// Start with maintenance disabled so Start() returns immediately.
cfg := defaultCfg()
cfg.MaintenanceEnabled = false
svc, _, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
go svc.Start(ctx)
svc.Wait() // drains doneCh after early return
// Stop after Wait — must not panic or double-close.
assert.NotPanics(t, func() { svc.Stop() })
}
// ---- Start / running flag ----
func TestStart_MaintenanceDisabled_ExitsImmediately(t *testing.T) {
cfg := defaultCfg()
cfg.MaintenanceEnabled = false
svc, _, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
go svc.Start(ctx)
done := make(chan struct{})
go func() {
svc.Wait()
close(done)
}()
select {
case <-done:
// Good — returned without blocking.
case <-time.After(2 * time.Second):
t.Fatal("Start() did not return promptly when maintenance is disabled")
}
stats := svc.Stats()
assert.Equal(t, false, stats["running"])
}
func TestStart_StopSignal_ExitsCleanly(t *testing.T) {
// Start() with maintenance disabled exits immediately — verified in
// TestStart_MaintenanceDisabled_ExitsImmediately.
//
// The ticker/stop path is hard to test because Start() always sleeps
// 5 minutes before entering the loop. We verify instead that Stop()
// on an already-stopped service is safe and that the doneCh is closed
// after exit (i.e., Wait() returns).
cfg := defaultCfg()
cfg.MaintenanceEnabled = false
svc, _, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
go svc.Start(context.Background())
done := make(chan struct{})
go func() {
svc.Wait()
close(done)
}()
select {
case <-done:
// doneCh was closed — Start exited and Wait returned.
case <-time.After(2 * time.Second):
t.Fatal("Wait() did not return after Start exited")
}
// Stop after Wait must be a no-op and must not panic.
assert.NotPanics(t, func() { svc.Stop() })
}
func TestStart_DoubleStart_SecondCallIsNoOp(t *testing.T) {
cfg := defaultCfg()
cfg.MaintenanceEnabled = false // exits immediately
svc, _, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
// First call.
go svc.Start(ctx)
svc.Wait()
// Second call on the same (exhausted) svc should be a no-op and not panic.
assert.NotPanics(t, func() {
// svc.running is now false again — but doneCh is already closed.
// A second Start would attempt to close doneCh again which would panic
// if the running guard is missing. Verify the guard works.
svc.mu.Lock()
running := svc.running
svc.mu.Unlock()
assert.False(t, running)
})
}
// ---- RunNow ----
func TestRunNow_UpdatesLastRunTime(t *testing.T) {
cfg := defaultCfg()
cfg.ObservationRetentionDays = 0
cfg.CleanupStaleObservations = false
svc, _, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
before := time.Now()
svc.RunNow(context.Background())
// Allow async goroutine to finish.
time.Sleep(200 * time.Millisecond)
svc.mu.Lock()
lastRun := svc.lastRunTime
svc.mu.Unlock()
assert.True(t, lastRun.After(before) || lastRun.Equal(before),
"lastRunTime should be updated after RunNow")
}
func TestRunNow_IncrementsOptimizeCounter(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
svc.RunNow(context.Background())
time.Sleep(300 * time.Millisecond)
svc.mu.Lock()
optimizes := svc.totalOptimizeRun
svc.mu.Unlock()
assert.Equal(t, int64(1), optimizes)
}
func TestRunNow_StatsTotalOptimizesReflected(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
svc.RunNow(context.Background())
time.Sleep(300 * time.Millisecond)
stats := svc.Stats()
assert.Equal(t, int64(1), stats["total_optimizes"])
}
// ---- cleanupOldObservations (via RunNow) ----
func TestRunNow_RetentionDaysZero_NothingDeleted(t *testing.T) {
cfg := defaultCfg()
cfg.ObservationRetentionDays = 0
svc, _, obsStore, _, cleanup := testSetup(t, cfg)
defer cleanup()
// Insert observations.
for i := 0; i < 5; i++ {
insertObservation(t, obsStore, "session-1", "proj", i)
}
svc.RunNow(context.Background())
time.Sleep(300 * time.Millisecond)
remaining, err := obsStore.GetRecentObservations(context.Background(), "proj", 20)
require.NoError(t, err)
assert.Equal(t, 5, len(remaining), "nothing should be deleted when retention_days = 0")
svc.mu.Lock()
cleaned := svc.totalCleanedObs
svc.mu.Unlock()
assert.Equal(t, int64(0), cleaned)
}
func TestRunNow_RetentionDays_DeletesExpiredObservations(t *testing.T) {
cfg := defaultCfg()
cfg.ObservationRetentionDays = 1 // keep only last 1 day
svc, store, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
// Insert an observation and back-date it to 2 days ago.
obs := &gormdb.Observation{
SDKSessionID: "old-session",
Project: "proj",
Type: models.ObsTypeDiscovery,
CreatedAt: "2000-01-01T00:00:00Z",
CreatedAtEpoch: time.Now().AddDate(0, 0, -2).Unix(),
Scope: models.ScopeProject,
ImportanceScore: 1.0,
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(obs).Error)
// Insert a recent observation (should survive).
recentObs := &gormdb.Observation{
SDKSessionID: "new-session",
Project: "proj",
Type: models.ObsTypeDiscovery,
CreatedAt: time.Now().Format(time.RFC3339),
CreatedAtEpoch: time.Now().Unix(),
Scope: models.ScopeProject,
ImportanceScore: 1.0,
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(recentObs).Error)
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
// Only the recent observation should remain.
var count int64
store.GetDB().WithContext(ctx).Model(&gormdb.Observation{}).Count(&count)
assert.Equal(t, int64(1), count, "expired observation should have been deleted")
svc.mu.Lock()
cleaned := svc.totalCleanedObs
svc.mu.Unlock()
assert.Equal(t, int64(1), cleaned)
}
func TestRunNow_RetentionDays_VectorCleanupCalled(t *testing.T) {
cfg := defaultCfg()
cfg.ObservationRetentionDays = 1
tmpDir, err := os.MkdirTemp("", "maintenance_vec_test_*")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)
store, err := gormdb.NewStore(gormdb.Config{
Path: filepath.Join(tmpDir, "test.db"),
MaxConns: 4,
LogLevel: logger.Silent,
})
require.NoError(t, err)
defer store.Close()
observationStore := gormdb.NewObservationStore(store, nil, nil, nil)
summaryStore := gormdb.NewSummaryStore(store)
promptStore := gormdb.NewPromptStore(store, nil)
var mu sync.Mutex
var capturedIDs []int64
vectorCleanupFn := func(_ context.Context, ids []int64) {
mu.Lock()
defer mu.Unlock()
capturedIDs = append(capturedIDs, ids...)
}
svc := NewService(store, observationStore, summaryStore, promptStore, vectorCleanupFn, cfg, zerolog.Nop())
ctx := context.Background()
// Insert an expired observation directly.
obs := &gormdb.Observation{
SDKSessionID: "session-x",
Project: "proj",
Type: models.ObsTypeDiscovery,
CreatedAt: "2000-01-01T00:00:00Z",
CreatedAtEpoch: time.Now().AddDate(0, 0, -2).Unix(),
Scope: models.ScopeProject,
ImportanceScore: 1.0,
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(obs).Error)
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
mu.Lock()
ids := capturedIDs
mu.Unlock()
assert.NotEmpty(t, ids, "vector cleanup callback must be called with deleted IDs")
assert.Contains(t, ids, obs.ID)
}
// ---- cleanupStaleObservations (via RunNow) ----
func TestRunNow_CleanupStale_DeletesSupersededObservations(t *testing.T) {
cfg := defaultCfg()
cfg.CleanupStaleObservations = true
cfg.ObservationRetentionDays = 0
svc, store, obsStore, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
// Insert an active observation.
activeID := insertObservation(t, obsStore, "session-1", "proj", 1)
// Insert and mark a stale observation.
staleID := insertObservation(t, obsStore, "session-1", "proj", 2)
require.NoError(t, store.GetDB().WithContext(ctx).
Model(&gormdb.Observation{}).
Where("id = ?", staleID).
Update("is_superseded", 1).Error)
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
// Active observation must survive.
var activeCount int64
store.GetDB().WithContext(ctx).Model(&gormdb.Observation{}).Where("id = ?", activeID).Count(&activeCount)
assert.Equal(t, int64(1), activeCount, "active observation must not be deleted")
// Stale observation must be gone.
var staleCount int64
store.GetDB().WithContext(ctx).Model(&gormdb.Observation{}).Where("id = ?", staleID).Count(&staleCount)
assert.Equal(t, int64(0), staleCount, "stale observation must be deleted")
}
func TestRunNow_CleanupStale_DisabledLeavesStaleObservations(t *testing.T) {
cfg := defaultCfg()
cfg.CleanupStaleObservations = false
svc, store, obsStore, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
staleID := insertObservation(t, obsStore, "session-1", "proj", 1)
require.NoError(t, store.GetDB().WithContext(ctx).
Model(&gormdb.Observation{}).
Where("id = ?", staleID).
Update("is_superseded", 1).Error)
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
var count int64
store.GetDB().WithContext(ctx).Model(&gormdb.Observation{}).Where("id = ?", staleID).Count(&count)
assert.Equal(t, int64(1), count, "stale observation must survive when cleanup_stale is false")
}
func TestRunNow_CleanupStale_NoStaleRows_NothingChanged(t *testing.T) {
cfg := defaultCfg()
cfg.CleanupStaleObservations = true
svc, _, obsStore, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
// Only active observations.
for i := 0; i < 3; i++ {
insertObservation(t, obsStore, "session-1", "proj", i)
}
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
remaining, err := obsStore.GetRecentObservations(ctx, "proj", 20)
require.NoError(t, err)
assert.Equal(t, 3, len(remaining))
}
// ---- cleanupOldPrompts (via RunNow) ----
func TestRunNow_CleanupOldPrompts_DeletesExpiredPrompts(t *testing.T) {
svc, store, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
ctx := context.Background()
// Insert a prompt with an old epoch (31 days ago).
oldPrompt := &gormdb.UserPrompt{
ClaudeSessionID: "session-old",
PromptText: "old prompt",
PromptNumber: 1,
CreatedAt: "2000-01-01T00:00:00Z",
CreatedAtEpoch: time.Now().AddDate(0, 0, -31).Unix(),
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(oldPrompt).Error)
// Insert a recent prompt (should survive).
recentPrompt := &gormdb.UserPrompt{
ClaudeSessionID: "session-new",
PromptText: "recent prompt",
PromptNumber: 1,
CreatedAt: time.Now().Format(time.RFC3339),
CreatedAtEpoch: time.Now().Unix(),
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(recentPrompt).Error)
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
var count int64
store.GetDB().WithContext(ctx).Model(&gormdb.UserPrompt{}).Count(&count)
assert.Equal(t, int64(1), count, "only the recent prompt should survive")
}
func TestRunNow_CleanupOldPrompts_NothingExpired_AllSurvive(t *testing.T) {
svc, store, _, promptStore, cleanup := testSetup(t, defaultCfg())
defer cleanup()
ctx := context.Background()
for i := 1; i <= 5; i++ {
_, err := promptStore.SaveUserPromptWithMatches(ctx, "session-1", i, "prompt", 1)
require.NoError(t, err)
}
svc.RunNow(ctx)
time.Sleep(300 * time.Millisecond)
var count int64
store.GetDB().WithContext(ctx).Model(&gormdb.UserPrompt{}).Count(&count)
assert.Equal(t, int64(5), count, "no prompts should be deleted when none are expired")
}
// ---- Stats race safety ----
func TestStats_ConcurrentAccess_NoRace(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
var wg sync.WaitGroup
for i := 0; i < 20; i++ {
wg.Add(1)
go func() {
defer wg.Done()
_ = svc.Stats()
}()
}
wg.Wait()
}
// ---- RunNow concurrent safety ----
func TestRunNow_ConcurrentCalls_NoRace(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
ctx := context.Background()
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
svc.RunNow(ctx)
}()
}
wg.Wait()
time.Sleep(500 * time.Millisecond)
}
// ---- lastRunDuration is populated ----
func TestRunNow_LastRunDuration_IsPopulated(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
svc.RunNow(context.Background())
time.Sleep(300 * time.Millisecond)
svc.mu.Lock()
dur := svc.lastRunDuration
svc.mu.Unlock()
assert.Greater(t, int64(dur), int64(0), "lastRunDuration should be set after a maintenance run")
}
func TestStats_LastDurationMs_IsPopulated(t *testing.T) {
svc, _, _, _, cleanup := testSetup(t, defaultCfg())
defer cleanup()
svc.RunNow(context.Background())
time.Sleep(300 * time.Millisecond)
stats := svc.Stats()
// The value is int64 milliseconds; it might be 0 for very fast runs — just verify the key exists.
_, ok := stats["last_duration_ms"]
assert.True(t, ok, "stats must contain last_duration_ms key")
}
// ---- Batch deletion boundary ----
func TestRunNow_RetentionDays_BatchDeletion_MoreThan100Rows(t *testing.T) {
cfg := defaultCfg()
cfg.ObservationRetentionDays = 1
svc, store, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
// Insert 150 expired observations (forces 2 batches of 100).
for i := 0; i < 150; i++ {
obs := &gormdb.Observation{
SDKSessionID: "session-old",
Project: "proj",
Type: models.ObsTypeDiscovery,
CreatedAt: "2000-01-01T00:00:00Z",
CreatedAtEpoch: time.Now().AddDate(0, 0, -2).Unix(),
Scope: models.ScopeProject,
ImportanceScore: 1.0,
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(obs).Error)
}
svc.RunNow(ctx)
time.Sleep(500 * time.Millisecond)
var remaining int64
store.GetDB().WithContext(ctx).Model(&gormdb.Observation{}).Count(&remaining)
assert.Equal(t, int64(0), remaining, "all 150 expired observations should be deleted in batches")
svc.mu.Lock()
cleaned := svc.totalCleanedObs
svc.mu.Unlock()
assert.Equal(t, int64(150), cleaned)
}
func TestRunNow_CleanupStale_BatchDeletion_MoreThan100Rows(t *testing.T) {
cfg := defaultCfg()
cfg.CleanupStaleObservations = true
svc, store, _, _, cleanup := testSetup(t, cfg)
defer cleanup()
ctx := context.Background()
// Insert 120 superseded observations.
for i := 0; i < 120; i++ {
obs := &gormdb.Observation{
SDKSessionID: "session-stale",
Project: "proj",
Type: models.ObsTypeDiscovery,
CreatedAt: time.Now().Format(time.RFC3339),
CreatedAtEpoch: time.Now().Unix(),
Scope: models.ScopeProject,
ImportanceScore: 1.0,
IsSuperseded: 1,
}
require.NoError(t, store.GetDB().WithContext(ctx).Create(obs).Error)
}
svc.RunNow(ctx)
time.Sleep(500 * time.Millisecond)
var remaining int64
store.GetDB().WithContext(ctx).Model(&gormdb.Observation{}).Where("is_superseded = ?", 1).Count(&remaining)
assert.Equal(t, int64(0), remaining, "all 120 stale observations should be deleted in batches")
}