Files
claude-mnemonic/internal/vector/hybrid/autotuner.go
lukaszraczylo 1ae8035470 feat(graph): add observation graph with hybrid vector storage
- [x] Add golangci.yml configuration with fieldalignment linter
- [x] Implement observation graph structure with edge detection
- [x] Add LEANN-inspired hybrid vector storage with hub threshold
- [x] Implement graph-aware search with selective recomputation
- [x] Add auto-tuner for dynamic hub threshold adjustment
- [x] Add graph and vector metrics tracking and reporting
- [x] Extend configuration for graph parameters
- [x] Add graph rebuild background service with periodic updates
- [x] Add HTTP endpoints for graph stats and vector metrics
- [x] Update UI with advanced metrics sidebar panel
- [x] Implement AST-aware code chunking for Go, Python, TypeScript
2026-01-07 18:51:40 +00:00

310 lines
7.4 KiB
Go

package hybrid
import (
"context"
"sync"
"time"
"github.com/lukaszraczylo/claude-mnemonic/internal/vector/sqlitevec"
"github.com/rs/zerolog/log"
)
// AutoTuner dynamically adjusts hub threshold based on query performance
type AutoTuner struct {
ctx context.Context
client *Client
cancel context.CancelFunc
latencies []time.Duration
wg sync.WaitGroup
queries int64
targetLatency time.Duration
adjustPeriod time.Duration
minThreshold int
maxThreshold int
adjustments int
latenciesMu sync.Mutex
}
// AutoTunerConfig configures the auto-tuner
type AutoTunerConfig struct {
TargetLatency time.Duration // Target p95 latency (default: 50ms)
MinThreshold int // Min hub threshold (default: 2)
MaxThreshold int // Max hub threshold (default: 20)
AdjustPeriod time.Duration // Adjustment frequency (default: 5min)
}
// DefaultAutoTunerConfig returns sensible defaults
func DefaultAutoTunerConfig() AutoTunerConfig {
return AutoTunerConfig{
TargetLatency: 50 * time.Millisecond,
MinThreshold: 2,
MaxThreshold: 20,
AdjustPeriod: 5 * time.Minute,
}
}
// NewAutoTuner creates a new auto-tuner for the hybrid client
func NewAutoTuner(client *Client, cfg AutoTunerConfig) *AutoTuner {
ctx, cancel := context.WithCancel(context.Background())
tuner := &AutoTuner{
client: client,
targetLatency: cfg.TargetLatency,
minThreshold: cfg.MinThreshold,
maxThreshold: cfg.MaxThreshold,
adjustPeriod: cfg.AdjustPeriod,
latencies: make([]time.Duration, 0, 1000),
ctx: ctx,
cancel: cancel,
}
return tuner
}
// Start begins auto-tuning in the background
func (a *AutoTuner) Start() {
a.wg.Add(1)
go a.tuningLoop()
log.Info().
Dur("target_latency", a.targetLatency).
Int("min_threshold", a.minThreshold).
Int("max_threshold", a.maxThreshold).
Dur("adjust_period", a.adjustPeriod).
Msg("Auto-tuner started")
}
// Stop stops the auto-tuner
func (a *AutoTuner) Stop() {
a.cancel()
a.wg.Wait()
log.Info().Msg("Auto-tuner stopped")
}
// RecordQuery records a query latency for analysis
func (a *AutoTuner) RecordQuery(latency time.Duration) {
a.latenciesMu.Lock()
defer a.latenciesMu.Unlock()
a.queries++
a.latencies = append(a.latencies, latency)
// Keep only recent queries (last 1000)
if len(a.latencies) > 1000 {
a.latencies = a.latencies[len(a.latencies)-1000:]
}
}
// tuningLoop periodically adjusts hub threshold
func (a *AutoTuner) tuningLoop() {
defer a.wg.Done()
ticker := time.NewTicker(a.adjustPeriod)
defer ticker.Stop()
for {
select {
case <-a.ctx.Done():
return
case <-ticker.C:
a.adjustThreshold()
}
}
}
// adjustThreshold analyzes recent queries and adjusts hub threshold
func (a *AutoTuner) adjustThreshold() {
a.latenciesMu.Lock()
defer a.latenciesMu.Unlock()
if len(a.latencies) < 10 {
// Not enough data yet
return
}
// Calculate p95 latency
p95 := calculateP95(a.latencies)
currentThreshold := a.client.hubThreshold
log.Debug().
Dur("p95_latency", p95).
Dur("target_latency", a.targetLatency).
Int("current_threshold", currentThreshold).
Int("queries", len(a.latencies)).
Msg("Auto-tuner evaluating performance")
// Determine adjustment direction
var newThreshold int
if p95 > a.targetLatency {
// Too slow - lower threshold (more hubs = faster queries)
adjustment := calculateAdjustment(p95, a.targetLatency)
newThreshold = currentThreshold - adjustment
if newThreshold < a.minThreshold {
newThreshold = a.minThreshold
}
log.Info().
Dur("p95", p95).
Int("old_threshold", currentThreshold).
Int("new_threshold", newThreshold).
Msg("Auto-tuner: Lowering hub threshold (too slow)")
} else if p95 < a.targetLatency*8/10 {
// Too fast - raise threshold (fewer hubs = more savings)
// Only adjust if significantly faster (20% margin)
adjustment := calculateAdjustment(a.targetLatency, p95)
newThreshold = currentThreshold + adjustment
if newThreshold > a.maxThreshold {
newThreshold = a.maxThreshold
}
log.Info().
Dur("p95", p95).
Int("old_threshold", currentThreshold).
Int("new_threshold", newThreshold).
Msg("Auto-tuner: Raising hub threshold (room for savings)")
} else {
// Within acceptable range, no adjustment needed
log.Debug().
Dur("p95", p95).
Int("threshold", currentThreshold).
Msg("Auto-tuner: Performance acceptable, no adjustment")
return
}
// Apply adjustment
if newThreshold != currentThreshold {
a.client.hubThreshold = newThreshold
a.adjustments++
// Clear latency history after adjustment
a.latencies = make([]time.Duration, 0, 1000)
log.Info().
Int("threshold", newThreshold).
Int("total_adjustments", a.adjustments).
Msg("Hub threshold adjusted by auto-tuner")
}
}
// calculateP95 computes the 95th percentile latency
func calculateP95(latencies []time.Duration) time.Duration {
if len(latencies) == 0 {
return 0
}
// Sort latencies
sorted := make([]time.Duration, len(latencies))
copy(sorted, latencies)
// Simple bubble sort (small dataset)
n := len(sorted)
for i := 0; i < n-1; i++ {
for j := 0; j < n-i-1; j++ {
if sorted[j] > sorted[j+1] {
sorted[j], sorted[j+1] = sorted[j+1], sorted[j]
}
}
}
// Return 95th percentile
idx := int(float64(len(sorted)) * 0.95)
if idx >= len(sorted) {
idx = len(sorted) - 1
}
return sorted[idx]
}
// calculateAdjustment determines how much to adjust threshold
func calculateAdjustment(actual, target time.Duration) int {
// Calculate percentage difference
diff := float64(actual-target) / float64(target)
// Adjust more aggressively for larger differences
if diff > 0.5 || diff < -0.5 {
return 3 // Large adjustment
} else if diff > 0.2 || diff < -0.2 {
return 2 // Medium adjustment
}
return 1 // Small adjustment
}
// GetStats returns auto-tuner statistics
func (a *AutoTuner) GetStats() AutoTunerStats {
a.latenciesMu.Lock()
defer a.latenciesMu.Unlock()
stats := AutoTunerStats{
CurrentThreshold: a.client.hubThreshold,
TargetLatency: a.targetLatency,
TotalQueries: a.queries,
TotalAdjustments: a.adjustments,
RecentQueries: len(a.latencies),
}
if len(a.latencies) > 0 {
stats.P95Latency = calculateP95(a.latencies)
// Calculate average
var total time.Duration
for _, lat := range a.latencies {
total += lat
}
stats.AvgLatency = total / time.Duration(len(a.latencies))
}
return stats
}
// AutoTunerStats contains auto-tuner statistics
type AutoTunerStats struct {
CurrentThreshold int
TargetLatency time.Duration
P95Latency time.Duration
AvgLatency time.Duration
TotalQueries int64
TotalAdjustments int
RecentQueries int
}
// AutoTunedClient wraps Client with automatic performance tuning
type AutoTunedClient struct {
*Client
tuner *AutoTuner
}
// Query wraps the underlying Query call with latency tracking
func (a *AutoTunedClient) Query(ctx context.Context, query string, limit int, where map[string]any) ([]sqlitevec.QueryResult, error) {
start := time.Now()
results, err := a.Client.Query(ctx, query, limit, where)
latency := time.Since(start)
a.tuner.RecordQuery(latency)
return results, err
}
// WithAutoTuning wraps a hybrid client with auto-tuning enabled
func WithAutoTuning(client *Client, cfg AutoTunerConfig) *AutoTunedClient {
tuner := NewAutoTuner(client, cfg)
tuner.Start()
return &AutoTunedClient{
Client: client,
tuner: tuner,
}
}
// Stop stops the auto-tuner
func (a *AutoTunedClient) StopTuning() {
a.tuner.Stop()
}