mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-05 23:03:55 +00:00
f79782a008
* Resolves issue #13 - Switched model to bge-small-en-v1.5 - Added lazy re-embedding - Added model version tracking per vector - Added conversion of vectors to the new model * Add lfs support to the workflow. * Implements importance scoring with decay + voting #6 * Resolves issue #5 by marking observations as superseeded and scheduled for deletion * Implement pattern detection #7 * Improve injections and observations accuracy - Session start: Recent observations for project context (recency-based) - User prompt: Semantically relevant observations (similarity-based with threshold) * Added two stage retrieval with bi and cross encoder #8 * Implement query expansion and reformulation #9 * Knowledge graph and relationships ( resolves #4 ) - File Overlap Detection: Detects relationships when observations modify/read the same files - Concept Overlap Detection: Detects relationships based on shared semantic concepts - Type Progression Detection: Infers relationships from natural observation type progressions (e.g., discovery → bugfix = "fixes") - Temporal Proximity Detection: Detects relationships between observations in the same session within 5 minutes - Narrative Mention Detection: Detects explicit relationship language in narratives (e.g., "fixes", "depends on", "supersedes") * Add visualisation of the relations to the dashboard. * fixup! Add visualisation of the relations to the dashboard. * Update documentation with new settings and screenshots.
169 lines
5.6 KiB
Go
169 lines
5.6 KiB
Go
// Package scoring provides importance score calculation for observations.
|
||
package scoring
|
||
|
||
import (
|
||
"math"
|
||
"time"
|
||
|
||
"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
|
||
)
|
||
|
||
// Calculator computes importance scores for observations.
|
||
type Calculator struct {
|
||
config *models.ScoringConfig
|
||
}
|
||
|
||
// NewCalculator creates a new scoring calculator.
|
||
// If config is nil, uses the default configuration.
|
||
func NewCalculator(config *models.ScoringConfig) *Calculator {
|
||
if config == nil {
|
||
config = models.DefaultScoringConfig()
|
||
}
|
||
return &Calculator{config: config}
|
||
}
|
||
|
||
// Calculate computes the importance score for an observation at the given time.
|
||
//
|
||
// The scoring formula:
|
||
//
|
||
// FinalScore = (BaseScore × TypeWeight × RecencyDecay) + FeedbackContrib + ConceptContrib + RetrievalContrib
|
||
//
|
||
// Where:
|
||
// - BaseScore = 1.0
|
||
// - TypeWeight = observation type multiplier (e.g., bugfix=1.3, change=0.9)
|
||
// - RecencyDecay = 0.5^(age_days / half_life_days) - halves every 7 days by default
|
||
// - FeedbackContrib = user_feedback × feedback_weight
|
||
// - ConceptContrib = sum(concept_weights) × concept_weight_factor
|
||
// - RetrievalContrib = log2(retrieval_count + 1) × 0.1 × retrieval_weight
|
||
func (c *Calculator) Calculate(obs *models.Observation, now time.Time) float64 {
|
||
// 1. Get base type weight
|
||
typeWeight := models.TypeBaseScore(obs.Type)
|
||
|
||
// 2. Calculate recency decay: 0.5^(age_days / half_life_days)
|
||
ageDays := now.Sub(time.UnixMilli(obs.CreatedAtEpoch)).Hours() / 24.0
|
||
if ageDays < 0 {
|
||
ageDays = 0 // Handle future timestamps gracefully
|
||
}
|
||
recencyDecay := math.Pow(0.5, ageDays/c.config.RecencyHalfLifeDays)
|
||
|
||
// Core score = 1.0 × type_weight × recency_decay
|
||
coreScore := 1.0 * typeWeight * recencyDecay
|
||
|
||
// 3. User feedback contribution: feedback × weight
|
||
feedbackContrib := float64(obs.UserFeedback) * c.config.FeedbackWeight
|
||
|
||
// 4. Concept boost contribution: sum of matching concept weights × factor
|
||
conceptBoost := 0.0
|
||
for _, concept := range obs.Concepts {
|
||
if weight, ok := c.config.ConceptWeights[concept]; ok {
|
||
conceptBoost += weight
|
||
}
|
||
}
|
||
conceptContrib := conceptBoost * c.config.ConceptWeight
|
||
|
||
// 5. Retrieval boost: log2(count + 1) × 0.1 × weight (diminishing returns)
|
||
retrievalContrib := 0.0
|
||
if obs.RetrievalCount > 0 {
|
||
// log2(count + 1) gives diminishing returns: 1→1, 3→2, 7→3, 15→4, etc.
|
||
retrievalBoost := math.Log2(float64(obs.RetrievalCount)+1) * 0.1
|
||
retrievalContrib = retrievalBoost * c.config.RetrievalWeight
|
||
}
|
||
|
||
// Final score with minimum threshold
|
||
finalScore := coreScore + feedbackContrib + conceptContrib + retrievalContrib
|
||
if finalScore < c.config.MinScore {
|
||
finalScore = c.config.MinScore
|
||
}
|
||
|
||
return finalScore
|
||
}
|
||
|
||
// CalculateComponents returns the individual components of the importance score.
|
||
// Useful for debugging and explaining scores to users.
|
||
func (c *Calculator) CalculateComponents(obs *models.Observation, now time.Time) ScoreComponents {
|
||
typeWeight := models.TypeBaseScore(obs.Type)
|
||
|
||
ageDays := now.Sub(time.UnixMilli(obs.CreatedAtEpoch)).Hours() / 24.0
|
||
if ageDays < 0 {
|
||
ageDays = 0
|
||
}
|
||
recencyDecay := math.Pow(0.5, ageDays/c.config.RecencyHalfLifeDays)
|
||
|
||
coreScore := 1.0 * typeWeight * recencyDecay
|
||
feedbackContrib := float64(obs.UserFeedback) * c.config.FeedbackWeight
|
||
|
||
conceptBoost := 0.0
|
||
for _, concept := range obs.Concepts {
|
||
if weight, ok := c.config.ConceptWeights[concept]; ok {
|
||
conceptBoost += weight
|
||
}
|
||
}
|
||
conceptContrib := conceptBoost * c.config.ConceptWeight
|
||
|
||
retrievalContrib := 0.0
|
||
if obs.RetrievalCount > 0 {
|
||
retrievalBoost := math.Log2(float64(obs.RetrievalCount)+1) * 0.1
|
||
retrievalContrib = retrievalBoost * c.config.RetrievalWeight
|
||
}
|
||
|
||
finalScore := coreScore + feedbackContrib + conceptContrib + retrievalContrib
|
||
if finalScore < c.config.MinScore {
|
||
finalScore = c.config.MinScore
|
||
}
|
||
|
||
return ScoreComponents{
|
||
TypeWeight: typeWeight,
|
||
RecencyDecay: recencyDecay,
|
||
CoreScore: coreScore,
|
||
FeedbackContrib: feedbackContrib,
|
||
ConceptContrib: conceptContrib,
|
||
RetrievalContrib: retrievalContrib,
|
||
FinalScore: finalScore,
|
||
AgeDays: ageDays,
|
||
}
|
||
}
|
||
|
||
// ScoreComponents contains the breakdown of an importance score calculation.
|
||
type ScoreComponents struct {
|
||
TypeWeight float64 `json:"type_weight"`
|
||
RecencyDecay float64 `json:"recency_decay"`
|
||
CoreScore float64 `json:"core_score"`
|
||
FeedbackContrib float64 `json:"feedback_contrib"`
|
||
ConceptContrib float64 `json:"concept_contrib"`
|
||
RetrievalContrib float64 `json:"retrieval_contrib"`
|
||
FinalScore float64 `json:"final_score"`
|
||
AgeDays float64 `json:"age_days"`
|
||
}
|
||
|
||
// BatchCalculate computes scores for multiple observations.
|
||
// Returns a map of observation ID to calculated score.
|
||
func (c *Calculator) BatchCalculate(observations []*models.Observation, now time.Time) map[int64]float64 {
|
||
scores := make(map[int64]float64, len(observations))
|
||
for _, obs := range observations {
|
||
scores[obs.ID] = c.Calculate(obs, now)
|
||
}
|
||
return scores
|
||
}
|
||
|
||
// RecalculateThreshold returns the minimum duration before an observation
|
||
// should have its score recalculated. This prevents excessive recalculation
|
||
// while ensuring scores stay reasonably fresh.
|
||
func (c *Calculator) RecalculateThreshold() time.Duration {
|
||
// Recalculate at most every 6 hours
|
||
// This balances freshness with performance
|
||
return 6 * time.Hour
|
||
}
|
||
|
||
// UpdateConfig updates the calculator's scoring configuration.
|
||
// This allows runtime tuning of scoring parameters.
|
||
func (c *Calculator) UpdateConfig(config *models.ScoringConfig) {
|
||
if config != nil {
|
||
c.config = config
|
||
}
|
||
}
|
||
|
||
// GetConfig returns the current scoring configuration.
|
||
func (c *Calculator) GetConfig() *models.ScoringConfig {
|
||
return c.config
|
||
}
|