mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-05 23:03:55 +00:00
1ae8035470
- [x] Add golangci.yml configuration with fieldalignment linter - [x] Implement observation graph structure with edge detection - [x] Add LEANN-inspired hybrid vector storage with hub threshold - [x] Implement graph-aware search with selective recomputation - [x] Add auto-tuner for dynamic hub threshold adjustment - [x] Add graph and vector metrics tracking and reporting - [x] Extend configuration for graph parameters - [x] Add graph rebuild background service with periodic updates - [x] Add HTTP endpoints for graph stats and vector metrics - [x] Update UI with advanced metrics sidebar panel - [x] Implement AST-aware code chunking for Go, Python, TypeScript
418 lines
10 KiB
Go
418 lines
10 KiB
Go
package graph
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
const (
|
|
// SemanticSimilarityThreshold for creating semantic edges
|
|
SemanticSimilarityThreshold = 0.85
|
|
|
|
// MinFileOverlapForEdge minimum file overlap ratio to create edge
|
|
MinFileOverlapForEdge = 0.3
|
|
|
|
// MaxEdgesPerNode prevents creating too many edges
|
|
MaxEdgesPerNode = 20
|
|
)
|
|
|
|
// DetectEdges identifies relationships between observations
|
|
func DetectEdges(ctx context.Context, observations []*models.Observation) ([]Edge, error) {
|
|
if len(observations) < 2 {
|
|
return nil, nil
|
|
}
|
|
|
|
edges := make([]Edge, 0)
|
|
|
|
// Build lookup maps for efficient detection
|
|
sessionMap := buildSessionMap(observations)
|
|
conceptMap := buildConceptMap(observations)
|
|
fileMap := buildFileMap(observations)
|
|
|
|
log.Info().
|
|
Int("observations", len(observations)).
|
|
Int("sessions", len(sessionMap)).
|
|
Int("concepts", len(conceptMap)).
|
|
Msg("Starting edge detection")
|
|
|
|
// Detect temporal edges (same session)
|
|
temporalEdges := detectTemporalEdges(sessionMap)
|
|
edges = append(edges, temporalEdges...)
|
|
|
|
// Detect concept edges (shared tags)
|
|
conceptEdges := detectConceptEdges(conceptMap)
|
|
edges = append(edges, conceptEdges...)
|
|
|
|
// Detect file overlap edges
|
|
fileEdges := detectFileOverlapEdges(fileMap, observations)
|
|
edges = append(edges, fileEdges...)
|
|
|
|
// Prune excessive edges per node
|
|
edges = pruneEdges(edges, MaxEdgesPerNode)
|
|
|
|
log.Info().
|
|
Int("temporal_edges", len(temporalEdges)).
|
|
Int("concept_edges", len(conceptEdges)).
|
|
Int("file_edges", len(fileEdges)).
|
|
Int("total_edges", len(edges)).
|
|
Msg("Edge detection complete")
|
|
|
|
return edges, nil
|
|
}
|
|
|
|
// buildSessionMap groups observations by SDK session
|
|
func buildSessionMap(observations []*models.Observation) map[string][]int64 {
|
|
sessionMap := make(map[string][]int64)
|
|
|
|
for _, obs := range observations {
|
|
if obs.SDKSessionID != "" {
|
|
sessionMap[obs.SDKSessionID] = append(sessionMap[obs.SDKSessionID], obs.ID)
|
|
}
|
|
}
|
|
|
|
return sessionMap
|
|
}
|
|
|
|
// buildConceptMap groups observations by concept tags
|
|
func buildConceptMap(observations []*models.Observation) map[string][]int64 {
|
|
conceptMap := make(map[string][]int64)
|
|
|
|
for _, obs := range observations {
|
|
for _, concept := range obs.Concepts {
|
|
conceptMap[concept] = append(conceptMap[concept], obs.ID)
|
|
}
|
|
}
|
|
|
|
return conceptMap
|
|
}
|
|
|
|
// buildFileMap maps files to observations (from both FilesRead and FilesModified)
|
|
func buildFileMap(observations []*models.Observation) map[string][]int64 {
|
|
fileMap := make(map[string][]int64)
|
|
|
|
for _, obs := range observations {
|
|
// Add files from FilesRead
|
|
for _, file := range obs.FilesRead {
|
|
fileMap[file] = append(fileMap[file], obs.ID)
|
|
}
|
|
// Add files from FilesModified
|
|
for _, file := range obs.FilesModified {
|
|
fileMap[file] = append(fileMap[file], obs.ID)
|
|
}
|
|
}
|
|
|
|
return fileMap
|
|
}
|
|
|
|
// detectTemporalEdges creates edges between observations in the same session
|
|
func detectTemporalEdges(sessionMap map[string][]int64) []Edge {
|
|
edges := make([]Edge, 0)
|
|
|
|
for _, obsIDs := range sessionMap {
|
|
if len(obsIDs) < 2 {
|
|
continue
|
|
}
|
|
|
|
// Create edges between consecutive observations in session
|
|
for i := 0; i < len(obsIDs)-1; i++ {
|
|
edges = append(edges, Edge{
|
|
FromID: obsIDs[i],
|
|
ToID: obsIDs[i+1],
|
|
Relation: RelationTemporal,
|
|
Weight: 0.8, // High weight for temporal proximity
|
|
})
|
|
}
|
|
}
|
|
|
|
return edges
|
|
}
|
|
|
|
// detectConceptEdges creates edges between observations sharing concepts
|
|
func detectConceptEdges(conceptMap map[string][]int64) []Edge {
|
|
edges := make([]Edge, 0)
|
|
seen := make(map[string]bool)
|
|
|
|
for concept, obsIDs := range conceptMap {
|
|
if len(obsIDs) < 2 {
|
|
continue
|
|
}
|
|
|
|
// Create edges between all observations sharing this concept
|
|
for i := 0; i < len(obsIDs); i++ {
|
|
for j := i + 1; j < len(obsIDs); j++ {
|
|
// Use sorted pair as key to avoid duplicates
|
|
pairKey := edgeKey(obsIDs[i], obsIDs[j])
|
|
if seen[pairKey] {
|
|
continue
|
|
}
|
|
seen[pairKey] = true
|
|
|
|
// Weight based on concept specificity (longer = more specific)
|
|
weight := float32(0.5 + 0.3*math.Min(1.0, float64(len(concept))/20.0))
|
|
|
|
edges = append(edges, Edge{
|
|
FromID: obsIDs[i],
|
|
ToID: obsIDs[j],
|
|
Relation: RelationConcept,
|
|
Weight: weight,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return edges
|
|
}
|
|
|
|
// detectFileOverlapEdges creates edges based on file references
|
|
func detectFileOverlapEdges(fileMap map[string][]int64, observations []*models.Observation) []Edge {
|
|
edges := make([]Edge, 0)
|
|
seen := make(map[string]bool)
|
|
|
|
// Build observation ID to observation map for quick lookup
|
|
obsMap := make(map[int64]*models.Observation)
|
|
for _, obs := range observations {
|
|
obsMap[obs.ID] = obs
|
|
}
|
|
|
|
for _, obsIDs := range fileMap {
|
|
if len(obsIDs) < 2 {
|
|
continue
|
|
}
|
|
|
|
// Create edges between observations referencing same files
|
|
for i := 0; i < len(obsIDs); i++ {
|
|
for j := i + 1; j < len(obsIDs); j++ {
|
|
pairKey := edgeKey(obsIDs[i], obsIDs[j])
|
|
if seen[pairKey] {
|
|
continue
|
|
}
|
|
seen[pairKey] = true
|
|
|
|
// Calculate file overlap ratio
|
|
obs1, ok1 := obsMap[obsIDs[i]]
|
|
obs2, ok2 := obsMap[obsIDs[j]]
|
|
|
|
if !ok1 || !ok2 {
|
|
continue
|
|
}
|
|
|
|
// Merge FilesRead and FilesModified for both observations
|
|
files1 := append([]string{}, obs1.FilesRead...)
|
|
files1 = append(files1, obs1.FilesModified...)
|
|
files2 := append([]string{}, obs2.FilesRead...)
|
|
files2 = append(files2, obs2.FilesModified...)
|
|
|
|
overlap := calculateFileOverlap(files1, files2)
|
|
if overlap < MinFileOverlapForEdge {
|
|
continue
|
|
}
|
|
|
|
edges = append(edges, Edge{
|
|
FromID: obsIDs[i],
|
|
ToID: obsIDs[j],
|
|
Relation: RelationFileOverlap,
|
|
Weight: overlap,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return edges
|
|
}
|
|
|
|
// calculateFileOverlap computes Jaccard similarity of file sets
|
|
func calculateFileOverlap(files1, files2 []string) float32 {
|
|
if len(files1) == 0 || len(files2) == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
// Convert to sets
|
|
set1 := make(map[string]bool)
|
|
for _, f := range files1 {
|
|
set1[f] = true
|
|
}
|
|
|
|
set2 := make(map[string]bool)
|
|
for _, f := range files2 {
|
|
set2[f] = true
|
|
}
|
|
|
|
// Count intersection
|
|
intersection := 0
|
|
for f := range set1 {
|
|
if set2[f] {
|
|
intersection++
|
|
}
|
|
}
|
|
|
|
// Jaccard similarity = intersection / union
|
|
union := len(set1) + len(set2) - intersection
|
|
if union == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
return float32(intersection) / float32(union)
|
|
}
|
|
|
|
// pruneEdges limits edges per node to prevent graph explosion
|
|
func pruneEdges(edges []Edge, maxPerNode int) []Edge {
|
|
if maxPerNode <= 0 {
|
|
return edges
|
|
}
|
|
|
|
// Count edges per node
|
|
outEdges := make(map[int64][]Edge)
|
|
inEdges := make(map[int64][]Edge)
|
|
|
|
for _, edge := range edges {
|
|
outEdges[edge.FromID] = append(outEdges[edge.FromID], edge)
|
|
inEdges[edge.ToID] = append(inEdges[edge.ToID], edge)
|
|
}
|
|
|
|
// Prune low-weight edges if node has too many
|
|
pruned := make([]Edge, 0, len(edges))
|
|
processed := make(map[string]bool)
|
|
|
|
for _, edge := range edges {
|
|
pairKey := edgeKey(edge.FromID, edge.ToID)
|
|
if processed[pairKey] {
|
|
continue
|
|
}
|
|
processed[pairKey] = true
|
|
|
|
// Check if either node has too many edges
|
|
fromCount := len(outEdges[edge.FromID])
|
|
toCount := len(inEdges[edge.ToID])
|
|
|
|
if fromCount <= maxPerNode && toCount <= maxPerNode {
|
|
pruned = append(pruned, edge)
|
|
continue
|
|
}
|
|
|
|
// Keep edge if it's high-weight (top edges for this node)
|
|
if shouldKeepEdge(edge, outEdges[edge.FromID], maxPerNode) {
|
|
pruned = append(pruned, edge)
|
|
}
|
|
}
|
|
|
|
if len(pruned) < len(edges) {
|
|
log.Debug().
|
|
Int("original", len(edges)).
|
|
Int("pruned", len(pruned)).
|
|
Int("removed", len(edges)-len(pruned)).
|
|
Msg("Pruned excessive edges")
|
|
}
|
|
|
|
return pruned
|
|
}
|
|
|
|
// shouldKeepEdge determines if edge should be kept during pruning
|
|
func shouldKeepEdge(edge Edge, nodeEdges []Edge, maxPerNode int) bool {
|
|
// Sort node's edges by weight descending
|
|
sortedEdges := make([]Edge, len(nodeEdges))
|
|
copy(sortedEdges, nodeEdges)
|
|
|
|
sortEdgesByWeight(sortedEdges)
|
|
|
|
// Keep edge if it's in top maxPerNode
|
|
for i := 0; i < maxPerNode && i < len(sortedEdges); i++ {
|
|
if sortedEdges[i].FromID == edge.FromID && sortedEdges[i].ToID == edge.ToID {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// sortEdgesByWeight sorts edges by weight descending
|
|
func sortEdgesByWeight(edges []Edge) {
|
|
// Simple bubble sort (edges are typically small per node)
|
|
n := len(edges)
|
|
for i := 0; i < n-1; i++ {
|
|
for j := 0; j < n-i-1; j++ {
|
|
if edges[j].Weight < edges[j+1].Weight {
|
|
edges[j], edges[j+1] = edges[j+1], edges[j]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// edgeKey creates a unique key for an edge pair (sorted)
|
|
func edgeKey(id1, id2 int64) string {
|
|
if id1 < id2 {
|
|
return fmt.Sprintf("%d-%d", id1, id2)
|
|
}
|
|
return fmt.Sprintf("%d-%d", id2, id1)
|
|
}
|
|
|
|
// DetectSemanticEdges creates edges based on semantic similarity
|
|
// This requires embeddings and is called separately when available
|
|
func DetectSemanticEdges(ctx context.Context, observations []*models.Observation, embeddings map[int64][]float32) []Edge {
|
|
edges := make([]Edge, 0)
|
|
seen := make(map[string]bool)
|
|
|
|
// Compare all pairs (expensive, but necessary for semantic similarity)
|
|
for i := 0; i < len(observations); i++ {
|
|
emb1, ok1 := embeddings[observations[i].ID]
|
|
if !ok1 {
|
|
continue
|
|
}
|
|
|
|
for j := i + 1; j < len(observations); j++ {
|
|
emb2, ok2 := embeddings[observations[j].ID]
|
|
if !ok2 {
|
|
continue
|
|
}
|
|
|
|
similarity := cosineSimilarity(emb1, emb2)
|
|
if similarity < SemanticSimilarityThreshold {
|
|
continue
|
|
}
|
|
|
|
pairKey := edgeKey(observations[i].ID, observations[j].ID)
|
|
if seen[pairKey] {
|
|
continue
|
|
}
|
|
seen[pairKey] = true
|
|
|
|
edges = append(edges, Edge{
|
|
FromID: observations[i].ID,
|
|
ToID: observations[j].ID,
|
|
Relation: RelationSemantic,
|
|
Weight: similarity,
|
|
})
|
|
}
|
|
}
|
|
|
|
log.Info().
|
|
Int("semantic_edges", len(edges)).
|
|
Float32("threshold", SemanticSimilarityThreshold).
|
|
Msg("Detected semantic edges")
|
|
|
|
return edges
|
|
}
|
|
|
|
// cosineSimilarity computes cosine similarity between two vectors
|
|
func cosineSimilarity(a, b []float32) float32 {
|
|
if len(a) != len(b) {
|
|
return 0.0
|
|
}
|
|
|
|
var dotProduct, normA, normB float32
|
|
for i := range a {
|
|
dotProduct += a[i] * b[i]
|
|
normA += a[i] * a[i]
|
|
normB += b[i] * b[i]
|
|
}
|
|
|
|
if normA == 0 || normB == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
return dotProduct / float32(math.Sqrt(float64(normA))*math.Sqrt(float64(normB)))
|
|
}
|