mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-08 23:39:40 +00:00
Release dec 2025 (#15)
* Resolves issue #13 - Switched model to bge-small-en-v1.5 - Added lazy re-embedding - Added model version tracking per vector - Added conversion of vectors to the new model * Add lfs support to the workflow. * Implements importance scoring with decay + voting #6 * Resolves issue #5 by marking observations as superseeded and scheduled for deletion * Implement pattern detection #7 * Improve injections and observations accuracy - Session start: Recent observations for project context (recency-based) - User prompt: Semantically relevant observations (similarity-based with threshold) * Added two stage retrieval with bi and cross encoder #8 * Implement query expansion and reformulation #9 * Knowledge graph and relationships ( resolves #4 ) - File Overlap Detection: Detects relationships when observations modify/read the same files - Concept Overlap Detection: Detects relationships based on shared semantic concepts - Type Progression Detection: Infers relationships from natural observation type progressions (e.g., discovery → bugfix = "fixes") - Temporal Proximity Detection: Detects relationships between observations in the same session within 5 minutes - Narrative Mention Detection: Detects explicit relationship language in narratives (e.g., "fixes", "depends on", "supersedes") * Add visualisation of the relations to the dashboard. * fixup! Add visualisation of the relations to the dashboard. * Update documentation with new settings and screenshots.
This commit is contained in:
@@ -60,12 +60,15 @@ func (c *Client) AddDocuments(ctx context.Context, docs []Document) error {
|
||||
return fmt.Errorf("generate embeddings: %w", err)
|
||||
}
|
||||
|
||||
// Insert into vectors table
|
||||
// Insert into vectors table with model version tracking
|
||||
const insertQuery = `
|
||||
INSERT OR REPLACE INTO vectors (doc_id, embedding, sqlite_id, doc_type, field_type, project, scope)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT OR REPLACE INTO vectors (doc_id, embedding, sqlite_id, doc_type, field_type, project, scope, model_version)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
|
||||
// Get current model version for tracking
|
||||
modelVersion := c.embedSvc.Version()
|
||||
|
||||
tx, err := c.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin transaction: %w", err)
|
||||
@@ -104,6 +107,7 @@ func (c *Client) AddDocuments(ctx context.Context, docs []Document) error {
|
||||
fieldType,
|
||||
project,
|
||||
scope,
|
||||
modelVersion,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("insert document %s: %w", doc.ID, err)
|
||||
@@ -114,7 +118,7 @@ func (c *Client) AddDocuments(ctx context.Context, docs []Document) error {
|
||||
return fmt.Errorf("commit transaction: %w", err)
|
||||
}
|
||||
|
||||
log.Debug().Int("count", len(docs)).Msg("Added documents to sqlite-vec")
|
||||
log.Debug().Int("count", len(docs)).Str("model", modelVersion).Msg("Added documents to sqlite-vec")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -212,6 +216,7 @@ func (c *Client) Query(ctx context.Context, query string, limit int, where map[s
|
||||
return nil, fmt.Errorf("scan row: %w", err)
|
||||
}
|
||||
|
||||
r.Similarity = DistanceToSimilarity(r.Distance)
|
||||
r.Metadata = map[string]any{
|
||||
"sqlite_id": float64(sqliteID), // Keep as float64 for compatibility
|
||||
"doc_type": docType.String,
|
||||
@@ -252,3 +257,148 @@ func truncateString(s string, maxLen int) string {
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
// Count returns the total number of vectors in the store.
|
||||
func (c *Client) Count(ctx context.Context) (int64, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
var count int64
|
||||
err := c.db.QueryRowContext(ctx, "SELECT COUNT(*) FROM vectors").Scan(&count)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("count vectors: %w", err)
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// ModelVersion returns the current embedding model version.
|
||||
func (c *Client) ModelVersion() string {
|
||||
return c.embedSvc.Version()
|
||||
}
|
||||
|
||||
// NeedsRebuild checks if vectors need to be rebuilt due to model version change.
|
||||
// Returns true if:
|
||||
// - The vectors table is empty
|
||||
// - Any vectors have a different model_version than the current model
|
||||
func (c *Client) NeedsRebuild(ctx context.Context) (bool, string) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
currentModel := c.embedSvc.Version()
|
||||
|
||||
// Check total count
|
||||
var totalCount int64
|
||||
err := c.db.QueryRowContext(ctx, "SELECT COUNT(*) FROM vectors").Scan(&totalCount)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("Failed to count vectors for rebuild check")
|
||||
return false, ""
|
||||
}
|
||||
|
||||
if totalCount == 0 {
|
||||
return true, "empty"
|
||||
}
|
||||
|
||||
// Check for vectors with different model version
|
||||
var staleCount int64
|
||||
err = c.db.QueryRowContext(ctx,
|
||||
"SELECT COUNT(*) FROM vectors WHERE model_version != ? OR model_version IS NULL",
|
||||
currentModel,
|
||||
).Scan(&staleCount)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("Failed to count stale vectors")
|
||||
return false, ""
|
||||
}
|
||||
|
||||
if staleCount > 0 {
|
||||
return true, fmt.Sprintf("model_mismatch:%d", staleCount)
|
||||
}
|
||||
|
||||
return false, ""
|
||||
}
|
||||
|
||||
// StaleVectorInfo contains information about a vector that needs rebuilding.
|
||||
type StaleVectorInfo struct {
|
||||
DocID string
|
||||
SQLiteID int64
|
||||
DocType string
|
||||
FieldType string
|
||||
Project string
|
||||
Scope string
|
||||
}
|
||||
|
||||
// GetStaleVectors returns doc_ids of vectors with mismatched or null model versions.
|
||||
// This enables granular rebuild - only re-embedding documents that need updating.
|
||||
func (c *Client) GetStaleVectors(ctx context.Context) ([]StaleVectorInfo, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
currentModel := c.embedSvc.Version()
|
||||
|
||||
query := `
|
||||
SELECT doc_id, sqlite_id, doc_type, field_type, project, scope
|
||||
FROM vectors
|
||||
WHERE model_version != ? OR model_version IS NULL
|
||||
`
|
||||
|
||||
rows, err := c.db.QueryContext(ctx, query, currentModel)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query stale vectors: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var results []StaleVectorInfo
|
||||
for rows.Next() {
|
||||
var info StaleVectorInfo
|
||||
var sqliteID sql.NullInt64
|
||||
var docType, fieldType, project, scope sql.NullString
|
||||
|
||||
if err := rows.Scan(&info.DocID, &sqliteID, &docType, &fieldType, &project, &scope); err != nil {
|
||||
return nil, fmt.Errorf("scan row: %w", err)
|
||||
}
|
||||
|
||||
info.SQLiteID = sqliteID.Int64
|
||||
info.DocType = docType.String
|
||||
info.FieldType = fieldType.String
|
||||
info.Project = project.String
|
||||
info.Scope = scope.String
|
||||
|
||||
results = append(results, info)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("iterate rows: %w", err)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// DeleteVectorsByDocIDs removes vectors by their doc_ids.
|
||||
// Used for granular rebuild - delete stale vectors before re-adding.
|
||||
func (c *Client) DeleteVectorsByDocIDs(ctx context.Context, docIDs []string) error {
|
||||
if len(docIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// Build placeholder string
|
||||
placeholders := make([]string, len(docIDs))
|
||||
args := make([]interface{}, len(docIDs))
|
||||
for i, id := range docIDs {
|
||||
placeholders[i] = "?"
|
||||
args[i] = id
|
||||
}
|
||||
|
||||
// #nosec G201 -- Placeholders are "?" strings, actual values are parameterized via args
|
||||
query := fmt.Sprintf("DELETE FROM vectors WHERE doc_id IN (%s)",
|
||||
strings.Join(placeholders, ","))
|
||||
|
||||
_, err := c.db.ExecContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete vectors by doc_ids: %w", err)
|
||||
}
|
||||
|
||||
log.Debug().Int("count", len(docIDs)).Msg("Deleted stale vectors by doc_id")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -38,7 +38,8 @@ func testDB(t *testing.T) (*sql.DB, func()) {
|
||||
doc_type TEXT,
|
||||
field_type TEXT,
|
||||
project TEXT,
|
||||
scope TEXT
|
||||
scope TEXT,
|
||||
model_version TEXT
|
||||
)
|
||||
`)
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -19,9 +19,32 @@ type Document struct {
|
||||
|
||||
// QueryResult represents a search result from vector search.
|
||||
type QueryResult struct {
|
||||
ID string
|
||||
Distance float64
|
||||
Metadata map[string]any
|
||||
ID string
|
||||
Distance float64
|
||||
Similarity float64 // 1.0 = identical, 0.0 = opposite (derived from distance)
|
||||
Metadata map[string]any
|
||||
}
|
||||
|
||||
// DistanceToSimilarity converts sqlite-vec cosine distance to similarity score.
|
||||
// Cosine distance: 0 = identical, 2 = opposite
|
||||
// Similarity: 1.0 = identical, 0.0 = opposite
|
||||
func DistanceToSimilarity(distance float64) float64 {
|
||||
return 1.0 - (distance / 2.0)
|
||||
}
|
||||
|
||||
// FilterByThreshold filters results to only include those above the similarity threshold.
|
||||
// If maxResults > 0, also caps the number of results.
|
||||
func FilterByThreshold(results []QueryResult, threshold float64, maxResults int) []QueryResult {
|
||||
var filtered []QueryResult
|
||||
for _, r := range results {
|
||||
if r.Similarity >= threshold {
|
||||
filtered = append(filtered, r)
|
||||
if maxResults > 0 && len(filtered) >= maxResults {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
// ExtractedIDs contains SQLite IDs extracted from query results, grouped by document type.
|
||||
|
||||
@@ -240,3 +240,101 @@ func (s *Sync) DeleteUserPrompts(ctx context.Context, promptIDs []int64) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SyncPattern syncs a single pattern to the vector store.
|
||||
func (s *Sync) SyncPattern(ctx context.Context, pattern *models.Pattern) error {
|
||||
docs := s.formatPatternDocs(pattern)
|
||||
if len(docs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
||||
return fmt.Errorf("add pattern docs: %w", err)
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Int64("patternId", pattern.ID).
|
||||
Int("docCount", len(docs)).
|
||||
Msg("Synced pattern to sqlite-vec")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// formatPatternDocs formats a pattern into vector documents.
|
||||
func (s *Sync) formatPatternDocs(pattern *models.Pattern) []Document {
|
||||
docs := make([]Document, 0, 3)
|
||||
|
||||
baseMetadata := map[string]any{
|
||||
"sqlite_id": pattern.ID,
|
||||
"doc_type": "pattern",
|
||||
"pattern_type": string(pattern.Type),
|
||||
"status": string(pattern.Status),
|
||||
"scope": "global", // Patterns are always global
|
||||
"frequency": pattern.Frequency,
|
||||
"confidence": pattern.Confidence,
|
||||
"created_at_epoch": pattern.CreatedAtEpoch,
|
||||
}
|
||||
|
||||
if len(pattern.Signature) > 0 {
|
||||
baseMetadata["signature"] = joinStrings(pattern.Signature, ",")
|
||||
}
|
||||
if len(pattern.Projects) > 0 {
|
||||
baseMetadata["projects"] = joinStrings(pattern.Projects, ",")
|
||||
}
|
||||
|
||||
// Pattern name as document
|
||||
if pattern.Name != "" {
|
||||
docs = append(docs, Document{
|
||||
ID: fmt.Sprintf("pattern_%d_name", pattern.ID),
|
||||
Content: pattern.Name,
|
||||
Metadata: copyMetadata(baseMetadata, "field_type", "name"),
|
||||
})
|
||||
}
|
||||
|
||||
// Pattern description as document
|
||||
if pattern.Description.Valid && pattern.Description.String != "" {
|
||||
docs = append(docs, Document{
|
||||
ID: fmt.Sprintf("pattern_%d_description", pattern.ID),
|
||||
Content: pattern.Description.String,
|
||||
Metadata: copyMetadata(baseMetadata, "field_type", "description"),
|
||||
})
|
||||
}
|
||||
|
||||
// Pattern recommendation as document
|
||||
if pattern.Recommendation.Valid && pattern.Recommendation.String != "" {
|
||||
docs = append(docs, Document{
|
||||
ID: fmt.Sprintf("pattern_%d_recommendation", pattern.ID),
|
||||
Content: pattern.Recommendation.String,
|
||||
Metadata: copyMetadata(baseMetadata, "field_type", "recommendation"),
|
||||
})
|
||||
}
|
||||
|
||||
return docs
|
||||
}
|
||||
|
||||
// DeletePatterns removes pattern documents from the vector store.
|
||||
func (s *Sync) DeletePatterns(ctx context.Context, patternIDs []int64) error {
|
||||
if len(patternIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate all possible document IDs for these patterns
|
||||
// Pattern: pattern_{id}_name, pattern_{id}_description, pattern_{id}_recommendation
|
||||
ids := make([]string, 0, len(patternIDs)*3)
|
||||
|
||||
for _, patternID := range patternIDs {
|
||||
ids = append(ids, fmt.Sprintf("pattern_%d_name", patternID))
|
||||
ids = append(ids, fmt.Sprintf("pattern_%d_description", patternID))
|
||||
ids = append(ids, fmt.Sprintf("pattern_%d_recommendation", patternID))
|
||||
}
|
||||
|
||||
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
|
||||
return fmt.Errorf("delete pattern docs: %w", err)
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Int("patternCount", len(patternIDs)).
|
||||
Msg("Deleted patterns from sqlite-vec")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user