mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-13 02:06:24 +00:00
Release dec 2025 (#15)
* Resolves issue #13 - Switched model to bge-small-en-v1.5 - Added lazy re-embedding - Added model version tracking per vector - Added conversion of vectors to the new model * Add lfs support to the workflow. * Implements importance scoring with decay + voting #6 * Resolves issue #5 by marking observations as superseeded and scheduled for deletion * Implement pattern detection #7 * Improve injections and observations accuracy - Session start: Recent observations for project context (recency-based) - User prompt: Semantically relevant observations (similarity-based with threshold) * Added two stage retrieval with bi and cross encoder #8 * Implement query expansion and reformulation #9 * Knowledge graph and relationships ( resolves #4 ) - File Overlap Detection: Detects relationships when observations modify/read the same files - Concept Overlap Detection: Detects relationships based on shared semantic concepts - Type Progression Detection: Infers relationships from natural observation type progressions (e.g., discovery → bugfix = "fixes") - Temporal Proximity Detection: Detects relationships between observations in the same session within 5 minutes - Narrative Mention Detection: Detects explicit relationship language in narratives (e.g., "fixes", "depends on", "supersedes") * Add visualisation of the relations to the dashboard. * fixup! Add visualisation of the relations to the dashboard. * Update documentation with new settings and screenshots.
This commit is contained in:
@@ -0,0 +1,398 @@
|
||||
// Package models contains domain models for claude-mnemonic.
|
||||
package models
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"database/sql/driver"
|
||||
"encoding/json"
|
||||
"time"
|
||||
)
|
||||
|
||||
// PatternType represents the category of detected pattern.
|
||||
type PatternType string
|
||||
|
||||
const (
|
||||
// PatternTypeBug represents recurring bug patterns (e.g., "nil handling oversight").
|
||||
PatternTypeBug PatternType = "bug"
|
||||
// PatternTypeRefactor represents recurring refactoring approaches (e.g., "interface extraction").
|
||||
PatternTypeRefactor PatternType = "refactor"
|
||||
// PatternTypeArchitecture represents consistent architectural patterns.
|
||||
PatternTypeArchitecture PatternType = "architecture"
|
||||
// PatternTypeAntiPattern represents identified anti-patterns to avoid.
|
||||
PatternTypeAntiPattern PatternType = "anti-pattern"
|
||||
// PatternTypeBestPractice represents best practices that work consistently.
|
||||
PatternTypeBestPractice PatternType = "best-practice"
|
||||
)
|
||||
|
||||
// PatternStatus represents the lifecycle status of a pattern.
|
||||
type PatternStatus string
|
||||
|
||||
const (
|
||||
// PatternStatusActive means the pattern is actively being tracked and can be referenced.
|
||||
PatternStatusActive PatternStatus = "active"
|
||||
// PatternStatusDeprecated means the pattern has been superseded or is no longer relevant.
|
||||
PatternStatusDeprecated PatternStatus = "deprecated"
|
||||
// PatternStatusMerged means this pattern was merged into another pattern.
|
||||
PatternStatusMerged PatternStatus = "merged"
|
||||
)
|
||||
|
||||
// Pattern represents a recurring pattern detected across observations.
|
||||
// This enables Claude to reference historical insights: "I've encountered this pattern 12 times."
|
||||
type Pattern struct {
|
||||
ID int64 `db:"id" json:"id"`
|
||||
Name string `db:"name" json:"name"` // e.g., "State Management Anti-Pattern"
|
||||
Type PatternType `db:"type" json:"type"` // bug, refactor, architecture, etc.
|
||||
Description sql.NullString `db:"description" json:"description"` // Detailed description
|
||||
Signature JSONStringArray `db:"signature" json:"signature"` // Keyword clusters for detection
|
||||
Recommendation sql.NullString `db:"recommendation" json:"recommendation"` // What works for this pattern
|
||||
Frequency int `db:"frequency" json:"frequency"` // How many times encountered
|
||||
Projects JSONStringArray `db:"projects" json:"projects"` // Projects where this pattern was seen
|
||||
ObservationIDs JSONInt64Array `db:"observation_ids" json:"observation_ids"` // Source observation IDs
|
||||
Status PatternStatus `db:"status" json:"status"` // active, deprecated, merged
|
||||
MergedIntoID sql.NullInt64 `db:"merged_into_id" json:"merged_into_id,omitempty"`
|
||||
Confidence float64 `db:"confidence" json:"confidence"` // Detection confidence (0.0-1.0)
|
||||
LastSeenAt string `db:"last_seen_at" json:"last_seen_at"` // Last time pattern was detected
|
||||
LastSeenEpoch int64 `db:"last_seen_at_epoch" json:"last_seen_at_epoch"`
|
||||
CreatedAt string `db:"created_at" json:"created_at"`
|
||||
CreatedAtEpoch int64 `db:"created_at_epoch" json:"created_at_epoch"`
|
||||
}
|
||||
|
||||
// JSONInt64Array is a custom type for handling JSON int64 arrays in SQLite.
|
||||
type JSONInt64Array []int64
|
||||
|
||||
// Scan implements sql.Scanner for JSONInt64Array.
|
||||
func (j *JSONInt64Array) Scan(src interface{}) error {
|
||||
if src == nil {
|
||||
*j = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
var data []byte
|
||||
switch v := src.(type) {
|
||||
case string:
|
||||
data = []byte(v)
|
||||
case []byte:
|
||||
data = v
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(data) == 0 {
|
||||
*j = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
return json.Unmarshal(data, j)
|
||||
}
|
||||
|
||||
// Value implements driver.Valuer for JSONInt64Array.
|
||||
func (j JSONInt64Array) Value() (driver.Value, error) {
|
||||
if j == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return json.Marshal(j)
|
||||
}
|
||||
|
||||
// PatternJSON is a JSON-friendly representation of Pattern.
|
||||
type PatternJSON struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type PatternType `json:"type"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Signature []string `json:"signature,omitempty"`
|
||||
Recommendation string `json:"recommendation,omitempty"`
|
||||
Frequency int `json:"frequency"`
|
||||
Projects []string `json:"projects,omitempty"`
|
||||
ObservationIDs []int64 `json:"observation_ids,omitempty"`
|
||||
Status PatternStatus `json:"status"`
|
||||
MergedIntoID int64 `json:"merged_into_id,omitempty"`
|
||||
Confidence float64 `json:"confidence"`
|
||||
LastSeenAt string `json:"last_seen_at"`
|
||||
LastSeenEpoch int64 `json:"last_seen_at_epoch"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
CreatedAtEpoch int64 `json:"created_at_epoch"`
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler for Pattern.
|
||||
func (p *Pattern) MarshalJSON() ([]byte, error) {
|
||||
j := PatternJSON{
|
||||
ID: p.ID,
|
||||
Name: p.Name,
|
||||
Type: p.Type,
|
||||
Signature: p.Signature,
|
||||
Frequency: p.Frequency,
|
||||
Projects: p.Projects,
|
||||
ObservationIDs: p.ObservationIDs,
|
||||
Status: p.Status,
|
||||
Confidence: p.Confidence,
|
||||
LastSeenAt: p.LastSeenAt,
|
||||
LastSeenEpoch: p.LastSeenEpoch,
|
||||
CreatedAt: p.CreatedAt,
|
||||
CreatedAtEpoch: p.CreatedAtEpoch,
|
||||
}
|
||||
if p.Description.Valid {
|
||||
j.Description = p.Description.String
|
||||
}
|
||||
if p.Recommendation.Valid {
|
||||
j.Recommendation = p.Recommendation.String
|
||||
}
|
||||
if p.MergedIntoID.Valid {
|
||||
j.MergedIntoID = p.MergedIntoID.Int64
|
||||
}
|
||||
return json.Marshal(j)
|
||||
}
|
||||
|
||||
// NewPattern creates a new pattern from detected data.
|
||||
func NewPattern(name string, patternType PatternType, description string, signature []string, project string, observationID int64) *Pattern {
|
||||
now := time.Now()
|
||||
return &Pattern{
|
||||
Name: name,
|
||||
Type: patternType,
|
||||
Description: sql.NullString{String: description, Valid: description != ""},
|
||||
Signature: signature,
|
||||
Frequency: 1,
|
||||
Projects: []string{project},
|
||||
ObservationIDs: []int64{observationID},
|
||||
Status: PatternStatusActive,
|
||||
Confidence: 0.5, // Initial confidence
|
||||
LastSeenAt: now.Format(time.RFC3339),
|
||||
LastSeenEpoch: now.UnixMilli(),
|
||||
CreatedAt: now.Format(time.RFC3339),
|
||||
CreatedAtEpoch: now.UnixMilli(),
|
||||
}
|
||||
}
|
||||
|
||||
// AddOccurrence records a new occurrence of this pattern.
|
||||
func (p *Pattern) AddOccurrence(project string, observationID int64) {
|
||||
p.Frequency++
|
||||
|
||||
// Add project if not already tracked
|
||||
found := false
|
||||
for _, proj := range p.Projects {
|
||||
if proj == project {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
p.Projects = append(p.Projects, project)
|
||||
}
|
||||
|
||||
// Add observation ID if not already tracked
|
||||
for _, id := range p.ObservationIDs {
|
||||
if id == observationID {
|
||||
return
|
||||
}
|
||||
}
|
||||
p.ObservationIDs = append(p.ObservationIDs, observationID)
|
||||
|
||||
// Update confidence based on frequency and cross-project occurrence
|
||||
p.updateConfidence()
|
||||
|
||||
// Update last seen timestamp
|
||||
now := time.Now()
|
||||
p.LastSeenAt = now.Format(time.RFC3339)
|
||||
p.LastSeenEpoch = now.UnixMilli()
|
||||
}
|
||||
|
||||
// updateConfidence adjusts confidence based on frequency and cross-project validation.
|
||||
func (p *Pattern) updateConfidence() {
|
||||
// Base confidence from frequency (logarithmic scaling)
|
||||
freqConfidence := 0.3 + (0.4 * (float64(min(p.Frequency, 10)) / 10.0))
|
||||
|
||||
// Cross-project bonus: patterns seen across multiple projects are more reliable
|
||||
projectBonus := 0.0
|
||||
if len(p.Projects) >= 2 {
|
||||
projectBonus = 0.1
|
||||
}
|
||||
if len(p.Projects) >= 5 {
|
||||
projectBonus = 0.2
|
||||
}
|
||||
|
||||
p.Confidence = min(1.0, freqConfidence+projectBonus)
|
||||
}
|
||||
|
||||
// PatternMatch represents a match between an observation and a potential pattern.
|
||||
type PatternMatch struct {
|
||||
PatternID int64 `json:"pattern_id"`
|
||||
Score float64 `json:"score"` // Match score (0.0-1.0)
|
||||
MatchedOn string `json:"matched_on"` // What triggered the match (concept, keyword, type, etc.)
|
||||
IsNew bool `json:"is_new"` // Whether this would create a new pattern
|
||||
SuggestedName string `json:"suggested_name,omitempty"`
|
||||
}
|
||||
|
||||
// PatternSignatureKeywords are common keywords used in pattern detection.
|
||||
var PatternSignatureKeywords = map[PatternType][]string{
|
||||
PatternTypeBug: {
|
||||
"nil", "null", "undefined", "panic", "crash", "error handling",
|
||||
"race condition", "deadlock", "memory leak", "overflow",
|
||||
"off-by-one", "boundary", "timeout", "concurrency",
|
||||
},
|
||||
PatternTypeRefactor: {
|
||||
"extract", "inline", "rename", "move", "split", "merge",
|
||||
"interface", "abstraction", "decouple", "simplify",
|
||||
"consolidate", "modularize", "encapsulate",
|
||||
},
|
||||
PatternTypeArchitecture: {
|
||||
"layer", "service", "repository", "controller", "handler",
|
||||
"middleware", "dependency injection", "factory", "singleton",
|
||||
"observer", "strategy", "adapter", "facade", "builder",
|
||||
},
|
||||
PatternTypeAntiPattern: {
|
||||
"god class", "spaghetti", "copy paste", "magic number",
|
||||
"hardcoded", "circular dependency", "premature optimization",
|
||||
"over-engineering", "feature envy", "data clump",
|
||||
},
|
||||
PatternTypeBestPractice: {
|
||||
"test", "validation", "logging", "monitoring", "documentation",
|
||||
"error handling", "retry", "timeout", "circuit breaker",
|
||||
"graceful shutdown", "health check", "metrics",
|
||||
},
|
||||
}
|
||||
|
||||
// DetectPatternType analyzes concepts and content to determine pattern type.
|
||||
func DetectPatternType(concepts []string, title, narrative string) PatternType {
|
||||
// Check concepts first
|
||||
for _, concept := range concepts {
|
||||
switch concept {
|
||||
case "anti-pattern":
|
||||
return PatternTypeAntiPattern
|
||||
case "best-practice":
|
||||
return PatternTypeBestPractice
|
||||
case "architecture":
|
||||
return PatternTypeArchitecture
|
||||
case "refactor":
|
||||
return PatternTypeRefactor
|
||||
}
|
||||
}
|
||||
|
||||
// Check for bug-related patterns in content
|
||||
content := title + " " + narrative
|
||||
for _, keyword := range PatternSignatureKeywords[PatternTypeBug] {
|
||||
if containsIgnoreCase(content, keyword) {
|
||||
return PatternTypeBug
|
||||
}
|
||||
}
|
||||
|
||||
// Default to refactor for other patterns
|
||||
return PatternTypeRefactor
|
||||
}
|
||||
|
||||
// containsIgnoreCase checks if text contains substr (case-insensitive).
|
||||
func containsIgnoreCase(text, substr string) bool {
|
||||
textLower := toLower(text)
|
||||
substrLower := toLower(substr)
|
||||
return contains(textLower, substrLower)
|
||||
}
|
||||
|
||||
// Simple implementations to avoid strings package dependency in this file
|
||||
func toLower(s string) string {
|
||||
b := make([]byte, len(s))
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func contains(s, substr string) bool {
|
||||
if len(substr) > len(s) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ExtractSignature creates a signature from observation content.
|
||||
func ExtractSignature(concepts []string, title, narrative string) []string {
|
||||
var signature []string
|
||||
|
||||
// Add all concepts
|
||||
signature = append(signature, concepts...)
|
||||
|
||||
// Extract key terms from title (simple word extraction)
|
||||
for _, word := range splitWords(title) {
|
||||
if len(word) > 3 && isSignificantWord(word) {
|
||||
signature = append(signature, toLower(word))
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueStrings(signature)
|
||||
}
|
||||
|
||||
// splitWords is a simple word splitter.
|
||||
func splitWords(s string) []string {
|
||||
var words []string
|
||||
word := ""
|
||||
for _, r := range s {
|
||||
if r == ' ' || r == '-' || r == '_' || r == '.' || r == ',' {
|
||||
if word != "" {
|
||||
words = append(words, word)
|
||||
word = ""
|
||||
}
|
||||
} else {
|
||||
word += string(r)
|
||||
}
|
||||
}
|
||||
if word != "" {
|
||||
words = append(words, word)
|
||||
}
|
||||
return words
|
||||
}
|
||||
|
||||
// isSignificantWord filters out common stop words.
|
||||
func isSignificantWord(word string) bool {
|
||||
stopWords := map[string]bool{
|
||||
"the": true, "and": true, "for": true, "with": true, "that": true,
|
||||
"this": true, "from": true, "have": true, "not": true, "are": true,
|
||||
"was": true, "but": true, "all": true, "can": true, "had": true,
|
||||
"were": true, "been": true, "will": true, "when": true, "what": true,
|
||||
}
|
||||
return !stopWords[toLower(word)]
|
||||
}
|
||||
|
||||
// uniqueStrings returns a slice with duplicate strings removed.
|
||||
func uniqueStrings(s []string) []string {
|
||||
seen := make(map[string]bool)
|
||||
var result []string
|
||||
for _, v := range s {
|
||||
if !seen[v] {
|
||||
seen[v] = true
|
||||
result = append(result, v)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// CalculateMatchScore computes similarity between two signatures.
|
||||
func CalculateMatchScore(sig1, sig2 []string) float64 {
|
||||
if len(sig1) == 0 || len(sig2) == 0 {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
set1 := make(map[string]bool)
|
||||
for _, s := range sig1 {
|
||||
set1[toLower(s)] = true
|
||||
}
|
||||
|
||||
matches := 0
|
||||
for _, s := range sig2 {
|
||||
if set1[toLower(s)] {
|
||||
matches++
|
||||
}
|
||||
}
|
||||
|
||||
// Jaccard similarity
|
||||
unionSize := len(sig1) + len(sig2) - matches
|
||||
if unionSize == 0 {
|
||||
return 0.0
|
||||
}
|
||||
return float64(matches) / float64(unionSize)
|
||||
}
|
||||
Reference in New Issue
Block a user