mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-08 23:39:40 +00:00
d04b60517a
* Make things 'betterer' across the board * fix: reorganize struct fields and config parameters for consistency - [x] Reorder Config struct fields alphabetically and by related functionality - [x] Reorganize Observation model fields with archival fields grouped together - [x] Reorder ObservationStore fields to group related members - [x] Reorder Store struct fields with health check caching grouped - [x] Reorganize HealthInfo and PoolMetrics struct field order - [x] Reorder maintenance Service struct fields logically - [x] Reorganize MCP server handler parameter structs alphabetically - [x] Reorder pattern detector candidate tracking fields - [x] Reorganize search Manager struct fields by functionality - [x] Reorder vector Client struct fields with mutex protections grouped - [x] Reorganize handler request/response struct fields - [x] Update handlers_test.go to expect wrapped response format - [x] Reorder middleware TokenAuth and rate limiter fields - [x] Reorganize Service struct fields with grouped functionality - [x] Fix RateLimiter field ordering for clarity - [x] Reorder CircuitBreaker metrics fields * fix(security): improve JSON output safety and path traversal protection - [x] Replace unsafe JSON string formatting with proper json.Marshal in export handler - [x] Remove escapeJSONString helper function in favor of standard JSON marshaling - [x] Add safeResolvePath function to validate paths and prevent directory traversal - [x] Apply path traversal validation in captureFileMtimes operations - [x] Cap result slice capacity in getRecentSearchQueries to prevent DoS via excessive allocation * fix(sdk): improve path traversal protection and allocation safety - [x] Enhance safeResolvePath with stricter validation using filepath.Rel - [x] Reject paths containing ".." after cleaning to prevent traversal - [x] Validate absolute paths are within cwd when cwd is specified - [x] Apply safeResolvePath validation to GetFileContent for consistency - [x] Add comprehensive test coverage for path traversal protection - [x] Fix allocation safety in getRecentSearchQueries by using constant capacity
291 lines
7.5 KiB
Go
291 lines
7.5 KiB
Go
// Package maintenance provides scheduled maintenance tasks for claude-mnemonic.
|
|
package maintenance
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/lukaszraczylo/claude-mnemonic/internal/config"
|
|
"github.com/lukaszraczylo/claude-mnemonic/internal/db/gorm"
|
|
"github.com/rs/zerolog"
|
|
)
|
|
|
|
// Service handles scheduled maintenance tasks.
|
|
type Service struct {
|
|
log zerolog.Logger
|
|
lastRunTime time.Time
|
|
promptStore *gorm.PromptStore
|
|
store *gorm.Store
|
|
vectorCleanupFn func(ctx context.Context, deletedIDs []int64)
|
|
config *config.Config
|
|
summaryStore *gorm.SummaryStore
|
|
stopCh chan struct{}
|
|
doneCh chan struct{}
|
|
observationStore *gorm.ObservationStore
|
|
lastRunDuration time.Duration
|
|
totalCleanedObs int64
|
|
totalOptimizeRun int64
|
|
mu sync.Mutex
|
|
running bool
|
|
}
|
|
|
|
// NewService creates a new maintenance service.
|
|
func NewService(
|
|
store *gorm.Store,
|
|
observationStore *gorm.ObservationStore,
|
|
summaryStore *gorm.SummaryStore,
|
|
promptStore *gorm.PromptStore,
|
|
vectorCleanupFn func(ctx context.Context, deletedIDs []int64),
|
|
cfg *config.Config,
|
|
log zerolog.Logger,
|
|
) *Service {
|
|
return &Service{
|
|
store: store,
|
|
observationStore: observationStore,
|
|
summaryStore: summaryStore,
|
|
promptStore: promptStore,
|
|
vectorCleanupFn: vectorCleanupFn,
|
|
config: cfg,
|
|
log: log.With().Str("component", "maintenance").Logger(),
|
|
stopCh: make(chan struct{}),
|
|
doneCh: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// Start begins the maintenance loop.
|
|
func (s *Service) Start(ctx context.Context) {
|
|
s.mu.Lock()
|
|
if s.running {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
s.running = true
|
|
s.mu.Unlock()
|
|
|
|
defer func() {
|
|
s.mu.Lock()
|
|
s.running = false
|
|
s.mu.Unlock()
|
|
close(s.doneCh)
|
|
}()
|
|
|
|
if !s.config.MaintenanceEnabled {
|
|
s.log.Info().Msg("Maintenance disabled, not starting scheduler")
|
|
return
|
|
}
|
|
|
|
interval := max(time.Duration(s.config.MaintenanceIntervalHours)*time.Hour, time.Hour)
|
|
|
|
s.log.Info().
|
|
Dur("interval", interval).
|
|
Int("retention_days", s.config.ObservationRetentionDays).
|
|
Bool("cleanup_stale", s.config.CleanupStaleObservations).
|
|
Msg("Starting maintenance scheduler")
|
|
|
|
// Initial run after 5 minutes (allow system to stabilize)
|
|
time.Sleep(5 * time.Minute)
|
|
s.runMaintenance(ctx)
|
|
|
|
ticker := time.NewTicker(interval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
s.log.Info().Msg("Maintenance shutting down due to context cancellation")
|
|
return
|
|
case <-s.stopCh:
|
|
s.log.Info().Msg("Maintenance shutting down due to stop signal")
|
|
return
|
|
case <-ticker.C:
|
|
s.runMaintenance(ctx)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop signals the maintenance service to stop.
|
|
func (s *Service) Stop() {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if !s.running {
|
|
return
|
|
}
|
|
|
|
close(s.stopCh)
|
|
}
|
|
|
|
// Wait waits for the maintenance service to finish.
|
|
func (s *Service) Wait() {
|
|
<-s.doneCh
|
|
}
|
|
|
|
// runMaintenance executes all maintenance tasks.
|
|
func (s *Service) runMaintenance(ctx context.Context) {
|
|
start := time.Now()
|
|
s.log.Info().Msg("Starting maintenance run")
|
|
|
|
var totalCleaned int64
|
|
|
|
// Task 1: Clean up old observations by age
|
|
if s.config.ObservationRetentionDays > 0 {
|
|
cleaned, err := s.cleanupOldObservations(ctx)
|
|
if err != nil {
|
|
s.log.Error().Err(err).Msg("Failed to cleanup old observations")
|
|
} else {
|
|
totalCleaned += cleaned
|
|
s.log.Info().Int64("cleaned", cleaned).Msg("Cleaned old observations by age")
|
|
}
|
|
}
|
|
|
|
// Task 2: Clean up stale observations
|
|
if s.config.CleanupStaleObservations {
|
|
cleaned, err := s.cleanupStaleObservations(ctx)
|
|
if err != nil {
|
|
s.log.Error().Err(err).Msg("Failed to cleanup stale observations")
|
|
} else {
|
|
totalCleaned += cleaned
|
|
s.log.Info().Int64("cleaned", cleaned).Msg("Cleaned stale observations")
|
|
}
|
|
}
|
|
|
|
// Task 3: Optimize database
|
|
if err := s.store.Optimize(ctx); err != nil {
|
|
s.log.Error().Err(err).Msg("Failed to optimize database")
|
|
} else {
|
|
s.totalOptimizeRun++
|
|
}
|
|
|
|
// Task 4: Clean up old prompts (keep last 1000 per session)
|
|
cleanedPrompts, err := s.cleanupOldPrompts(ctx)
|
|
if err != nil {
|
|
s.log.Error().Err(err).Msg("Failed to cleanup old prompts")
|
|
} else if cleanedPrompts > 0 {
|
|
s.log.Info().Int64("cleaned", cleanedPrompts).Msg("Cleaned old prompts")
|
|
}
|
|
|
|
// Update metrics
|
|
s.mu.Lock()
|
|
s.lastRunTime = time.Now()
|
|
s.lastRunDuration = time.Since(start)
|
|
s.totalCleanedObs += totalCleaned
|
|
s.mu.Unlock()
|
|
|
|
s.log.Info().
|
|
Dur("duration", time.Since(start)).
|
|
Int64("observations_cleaned", totalCleaned).
|
|
Msg("Maintenance run completed")
|
|
}
|
|
|
|
// cleanupOldObservations deletes observations older than the retention period.
|
|
func (s *Service) cleanupOldObservations(ctx context.Context) (int64, error) {
|
|
cutoffEpoch := time.Now().AddDate(0, 0, -s.config.ObservationRetentionDays).Unix()
|
|
|
|
// Get IDs of old observations
|
|
var deletedIDs []int64
|
|
err := s.store.GetDB().WithContext(ctx).
|
|
Model(&gorm.Observation{}).
|
|
Where("created_at_epoch < ?", cutoffEpoch).
|
|
Pluck("id", &deletedIDs).Error
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if len(deletedIDs) == 0 {
|
|
return 0, nil
|
|
}
|
|
|
|
// Delete in batches to avoid long transactions
|
|
batchSize := 100
|
|
for i := 0; i < len(deletedIDs); i += batchSize {
|
|
end := min(i+batchSize, len(deletedIDs))
|
|
batch := deletedIDs[i:end]
|
|
|
|
if err := s.store.GetDB().WithContext(ctx).
|
|
Where("id IN ?", batch).
|
|
Delete(&gorm.Observation{}).Error; err != nil {
|
|
return int64(i), err
|
|
}
|
|
|
|
// Sync vector DB deletions
|
|
if s.vectorCleanupFn != nil {
|
|
s.vectorCleanupFn(ctx, batch)
|
|
}
|
|
}
|
|
|
|
return int64(len(deletedIDs)), nil
|
|
}
|
|
|
|
// cleanupStaleObservations deletes observations marked as stale.
|
|
func (s *Service) cleanupStaleObservations(ctx context.Context) (int64, error) {
|
|
// Get IDs of stale observations (is_superseded = true)
|
|
var deletedIDs []int64
|
|
err := s.store.GetDB().WithContext(ctx).
|
|
Model(&gorm.Observation{}).
|
|
Where("is_superseded = ?", true).
|
|
Pluck("id", &deletedIDs).Error
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if len(deletedIDs) == 0 {
|
|
return 0, nil
|
|
}
|
|
|
|
// Delete in batches
|
|
batchSize := 100
|
|
for i := 0; i < len(deletedIDs); i += batchSize {
|
|
end := min(i+batchSize, len(deletedIDs))
|
|
batch := deletedIDs[i:end]
|
|
|
|
if err := s.store.GetDB().WithContext(ctx).
|
|
Where("id IN ?", batch).
|
|
Delete(&gorm.Observation{}).Error; err != nil {
|
|
return int64(i), err
|
|
}
|
|
|
|
// Sync vector DB deletions
|
|
if s.vectorCleanupFn != nil {
|
|
s.vectorCleanupFn(ctx, batch)
|
|
}
|
|
}
|
|
|
|
return int64(len(deletedIDs)), nil
|
|
}
|
|
|
|
// cleanupOldPrompts removes old prompts keeping only the most recent per session.
|
|
func (s *Service) cleanupOldPrompts(ctx context.Context) (int64, error) {
|
|
// Delete prompts older than 30 days that aren't the most recent in their session
|
|
cutoffEpoch := time.Now().AddDate(0, 0, -30).Unix()
|
|
|
|
result := s.store.GetDB().WithContext(ctx).
|
|
Where("created_at_epoch < ?", cutoffEpoch).
|
|
Delete(&gorm.UserPrompt{})
|
|
|
|
return result.RowsAffected, result.Error
|
|
}
|
|
|
|
// Stats returns maintenance statistics.
|
|
func (s *Service) Stats() map[string]any {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
return map[string]any{
|
|
"enabled": s.config.MaintenanceEnabled,
|
|
"interval_hours": s.config.MaintenanceIntervalHours,
|
|
"retention_days": s.config.ObservationRetentionDays,
|
|
"cleanup_stale": s.config.CleanupStaleObservations,
|
|
"last_run": s.lastRunTime,
|
|
"last_duration_ms": s.lastRunDuration.Milliseconds(),
|
|
"total_cleaned_obs": s.totalCleanedObs,
|
|
"total_optimizes": s.totalOptimizeRun,
|
|
"running": s.running,
|
|
}
|
|
}
|
|
|
|
// RunNow triggers an immediate maintenance run.
|
|
func (s *Service) RunNow(ctx context.Context) {
|
|
go s.runMaintenance(ctx)
|
|
}
|