Files
lukaszraczylo d04b60517a Make things 'betterer' across the board (#23)
* Make things 'betterer' across the board

* fix: reorganize struct fields and config parameters for consistency

- [x] Reorder Config struct fields alphabetically and by related functionality
- [x] Reorganize Observation model fields with archival fields grouped together
- [x] Reorder ObservationStore fields to group related members
- [x] Reorder Store struct fields with health check caching grouped
- [x] Reorganize HealthInfo and PoolMetrics struct field order
- [x] Reorder maintenance Service struct fields logically
- [x] Reorganize MCP server handler parameter structs alphabetically
- [x] Reorder pattern detector candidate tracking fields
- [x] Reorganize search Manager struct fields by functionality
- [x] Reorder vector Client struct fields with mutex protections grouped
- [x] Reorganize handler request/response struct fields
- [x] Update handlers_test.go to expect wrapped response format
- [x] Reorder middleware TokenAuth and rate limiter fields
- [x] Reorganize Service struct fields with grouped functionality
- [x] Fix RateLimiter field ordering for clarity
- [x] Reorder CircuitBreaker metrics fields

* fix(security): improve JSON output safety and path traversal protection

- [x] Replace unsafe JSON string formatting with proper json.Marshal in export handler
- [x] Remove escapeJSONString helper function in favor of standard JSON marshaling
- [x] Add safeResolvePath function to validate paths and prevent directory traversal
- [x] Apply path traversal validation in captureFileMtimes operations
- [x] Cap result slice capacity in getRecentSearchQueries to prevent DoS via excessive allocation

* fix(sdk): improve path traversal protection and allocation safety

- [x] Enhance safeResolvePath with stricter validation using filepath.Rel
- [x] Reject paths containing ".." after cleaning to prevent traversal
- [x] Validate absolute paths are within cwd when cwd is specified
- [x] Apply safeResolvePath validation to GetFileContent for consistency
- [x] Add comprehensive test coverage for path traversal protection
- [x] Fix allocation safety in getRecentSearchQueries by using constant capacity
2026-01-11 01:51:20 +00:00

140 lines
4.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Package scoring provides importance score calculation for observations.
package scoring
import (
"math"
"time"
"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
)
// Calculator computes importance scores for observations.
type Calculator struct {
config *models.ScoringConfig
}
// NewCalculator creates a new scoring calculator.
// If config is nil, uses the default configuration.
func NewCalculator(config *models.ScoringConfig) *Calculator {
if config == nil {
config = models.DefaultScoringConfig()
}
return &Calculator{config: config}
}
// Calculate computes the importance score for an observation at the given time.
//
// The scoring formula:
//
// FinalScore = (BaseScore × TypeWeight × RecencyDecay) + FeedbackContrib + ConceptContrib + RetrievalContrib
//
// Where:
// - BaseScore = 1.0
// - TypeWeight = observation type multiplier (e.g., bugfix=1.3, change=0.9)
// - RecencyDecay = 0.5^(age_days / half_life_days) - halves every 7 days by default
// - FeedbackContrib = user_feedback × feedback_weight
// - ConceptContrib = sum(concept_weights) × concept_weight_factor
// - RetrievalContrib = log2(retrieval_count + 1) × 0.1 × retrieval_weight
func (c *Calculator) Calculate(obs *models.Observation, now time.Time) float64 {
return c.CalculateComponents(obs, now).FinalScore
}
// CalculateComponents returns the individual components of the importance score.
// Useful for debugging and explaining scores to users.
// This is the core calculation method - Calculate() delegates to this.
func (c *Calculator) CalculateComponents(obs *models.Observation, now time.Time) ScoreComponents {
// 1. Get base type weight
typeWeight := models.TypeBaseScore(obs.Type)
// 2. Calculate recency decay: 0.5^(age_days / half_life_days)
ageDays := now.Sub(time.UnixMilli(obs.CreatedAtEpoch)).Hours() / 24.0
if ageDays < 0 {
ageDays = 0 // Handle future timestamps gracefully
}
recencyDecay := math.Pow(0.5, ageDays/c.config.RecencyHalfLifeDays)
// Core score = 1.0 × type_weight × recency_decay
coreScore := 1.0 * typeWeight * recencyDecay
// 3. User feedback contribution: feedback × weight
feedbackContrib := float64(obs.UserFeedback) * c.config.FeedbackWeight
// 4. Concept boost contribution: sum of matching concept weights × factor
conceptBoost := 0.0
for _, concept := range obs.Concepts {
if weight, ok := c.config.ConceptWeights[concept]; ok {
conceptBoost += weight
}
}
conceptContrib := conceptBoost * c.config.ConceptWeight
// 5. Retrieval boost: log2(count + 1) × 0.1 × weight (diminishing returns)
retrievalContrib := 0.0
if obs.RetrievalCount > 0 {
// log2(count + 1) gives diminishing returns: 1→1, 3→2, 7→3, 15→4, etc.
retrievalBoost := math.Log2(float64(obs.RetrievalCount)+1) * 0.1
retrievalContrib = retrievalBoost * c.config.RetrievalWeight
}
// Final score with minimum threshold
finalScore := coreScore + feedbackContrib + conceptContrib + retrievalContrib
if finalScore < c.config.MinScore {
finalScore = c.config.MinScore
}
return ScoreComponents{
TypeWeight: typeWeight,
RecencyDecay: recencyDecay,
CoreScore: coreScore,
FeedbackContrib: feedbackContrib,
ConceptContrib: conceptContrib,
RetrievalContrib: retrievalContrib,
FinalScore: finalScore,
AgeDays: ageDays,
}
}
// ScoreComponents contains the breakdown of an importance score calculation.
type ScoreComponents struct {
TypeWeight float64 `json:"type_weight"`
RecencyDecay float64 `json:"recency_decay"`
CoreScore float64 `json:"core_score"`
FeedbackContrib float64 `json:"feedback_contrib"`
ConceptContrib float64 `json:"concept_contrib"`
RetrievalContrib float64 `json:"retrieval_contrib"`
FinalScore float64 `json:"final_score"`
AgeDays float64 `json:"age_days"`
}
// BatchCalculate computes scores for multiple observations.
// Returns a map of observation ID to calculated score.
func (c *Calculator) BatchCalculate(observations []*models.Observation, now time.Time) map[int64]float64 {
scores := make(map[int64]float64, len(observations))
for _, obs := range observations {
scores[obs.ID] = c.Calculate(obs, now)
}
return scores
}
// RecalculateThreshold returns the minimum duration before an observation
// should have its score recalculated. This prevents excessive recalculation
// while ensuring scores stay reasonably fresh.
func (c *Calculator) RecalculateThreshold() time.Duration {
// Recalculate at most every 6 hours
// This balances freshness with performance
return 6 * time.Hour
}
// UpdateConfig updates the calculator's scoring configuration.
// This allows runtime tuning of scoring parameters.
func (c *Calculator) UpdateConfig(config *models.ScoringConfig) {
if config != nil {
c.config = config
}
}
// GetConfig returns the current scoring configuration.
func (c *Calculator) GetConfig() *models.ScoringConfig {
return c.config
}