Files
claude-mnemonic/internal/vector/sqlitevec/sync.go
T
lukaszraczylo 4f4b4ac70f feat(chunking): add AST-aware code chunking for Go, Python, TypeScript
- [x] Add language-specific chunkers with AST parsing (Go, Python, TypeScript)
- [x] Implement chunking manager to dispatch files to appropriate chunkers
- [x] Integrate code chunks into vector sync for semantic search
- [x] Add tree-sitter dependency for Python/TypeScript parsing
- [x] Reorder struct fields for consistency across codebase
- [x] Rename error variables to follow Go conventions (err → unmarshalErr, etc.)
- [x] Add code chunk metadata to vector documents (language, symbol name, line ranges)
- [x] Update worker service to initialize chunking pipeline with all three languages
2026-01-07 13:19:58 +00:00

455 lines
13 KiB
Go

// Package sqlitevec provides sqlite-vec based vector database integration for claude-mnemonic.
package sqlitevec
import (
"context"
"fmt"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
"github.com/rs/zerolog/log"
)
// Sync provides synchronization between SQLite data and vector embeddings.
type Sync struct {
client *Client
chunkingManager *chunking.Manager
}
// NewSync creates a new sync service.
func NewSync(client *Client) *Sync {
return &Sync{client: client}
}
// SetChunkingManager sets the code chunking manager (optional).
// If set, observations will include code chunks from tracked files.
func (s *Sync) SetChunkingManager(manager *chunking.Manager) {
s.chunkingManager = manager
}
// SyncObservation syncs a single observation to the vector store.
// If a chunking manager is configured, also chunks tracked code files.
func (s *Sync) SyncObservation(ctx context.Context, obs *models.Observation) error {
docs := s.formatObservationDocs(obs)
// Add code chunks from tracked files if chunking manager is available
if s.chunkingManager != nil {
codeChunkDocs := s.formatCodeChunkDocs(ctx, obs)
docs = append(docs, codeChunkDocs...)
}
if len(docs) == 0 {
return nil
}
if err := s.client.AddDocuments(ctx, docs); err != nil {
return fmt.Errorf("add observation docs: %w", err)
}
log.Debug().
Int64("observationId", obs.ID).
Int("docCount", len(docs)).
Msg("Synced observation to sqlite-vec")
return nil
}
// formatObservationDocs formats an observation into vector documents.
// Each semantic field becomes a separate vector document (granular approach).
func (s *Sync) formatObservationDocs(obs *models.Observation) []Document {
docs := make([]Document, 0, len(obs.Facts)+2)
// Determine scope for metadata
scope := string(obs.Scope)
if scope == "" {
scope = "project"
}
baseMetadata := map[string]any{
"sqlite_id": obs.ID,
"doc_type": "observation",
"sdk_session_id": obs.SDKSessionID,
"project": obs.Project,
"scope": scope,
"created_at_epoch": obs.CreatedAtEpoch,
"type": string(obs.Type),
}
if obs.Title.Valid {
baseMetadata["title"] = obs.Title.String
}
if obs.Subtitle.Valid {
baseMetadata["subtitle"] = obs.Subtitle.String
}
if len(obs.Concepts) > 0 {
baseMetadata["concepts"] = joinStrings(obs.Concepts, ",")
}
if len(obs.FilesRead) > 0 {
baseMetadata["files_read"] = joinStrings(obs.FilesRead, ",")
}
if len(obs.FilesModified) > 0 {
baseMetadata["files_modified"] = joinStrings(obs.FilesModified, ",")
}
// Narrative as separate document
if obs.Narrative.Valid && obs.Narrative.String != "" {
docs = append(docs, Document{
ID: fmt.Sprintf("obs_%d_narrative", obs.ID),
Content: obs.Narrative.String,
Metadata: copyMetadata(baseMetadata, "field_type", "narrative"),
})
}
// Each fact as separate document
for i, fact := range obs.Facts {
docs = append(docs, Document{
ID: fmt.Sprintf("obs_%d_fact_%d", obs.ID, i),
Content: fact,
Metadata: copyMetadataMulti(baseMetadata, map[string]any{
"field_type": "fact",
"fact_index": i,
}),
})
}
return docs
}
// formatCodeChunkDocs formats code chunks from tracked files into vector documents.
// Uses AST-aware chunking to extract semantic code units (functions, classes, methods).
func (s *Sync) formatCodeChunkDocs(ctx context.Context, obs *models.Observation) []Document {
if s.chunkingManager == nil {
return nil
}
// Determine scope for metadata
scope := string(obs.Scope)
if scope == "" {
scope = "project"
}
// Collect all tracked files (read + modified)
allFiles := make([]string, 0, len(obs.FilesRead)+len(obs.FilesModified))
allFiles = append(allFiles, obs.FilesRead...)
allFiles = append(allFiles, obs.FilesModified...)
// Filter to only files supported by chunking manager
var supportedFiles []string
for _, file := range allFiles {
if s.chunkingManager.SupportsFile(file) {
supportedFiles = append(supportedFiles, file)
}
}
if len(supportedFiles) == 0 {
return nil
}
// Chunk all supported files
results, errs := s.chunkingManager.ChunkFiles(ctx, supportedFiles)
if len(errs) > 0 {
// Log errors but don't fail the entire sync
for _, err := range errs {
log.Warn().Err(err).Msg("Failed to chunk file")
}
}
// Convert chunks to vector documents
docs := make([]Document, 0)
chunkIndex := 0
for filePath, chunks := range results {
for _, chunk := range chunks {
doc := Document{
ID: fmt.Sprintf("obs_%d_chunk_%d", obs.ID, chunkIndex),
Content: chunk.SearchableContent(),
Metadata: map[string]any{
"sqlite_id": obs.ID,
"doc_type": string(DocTypeCodeChunk),
"field_type": "code_chunk",
"sdk_session_id": obs.SDKSessionID,
"project": obs.Project,
"scope": scope,
"created_at_epoch": obs.CreatedAtEpoch,
// Code chunk specific metadata
"file_path": filePath,
"language": string(chunk.Language),
"chunk_type": string(chunk.Type),
"symbol_name": chunk.Name,
"start_line": chunk.StartLine,
"end_line": chunk.EndLine,
},
}
// Add parent name if this is a method
if chunk.ParentName != "" {
doc.Metadata["parent_name"] = chunk.ParentName
}
// Add signature if available
if chunk.Signature != "" {
doc.Metadata["signature"] = chunk.Signature
}
docs = append(docs, doc)
chunkIndex++
}
}
if len(docs) > 0 {
log.Debug().
Int64("observationId", obs.ID).
Int("codeChunks", len(docs)).
Int("files", len(results)).
Msg("Generated code chunk documents")
}
return docs
}
// SyncSummary syncs a single session summary to the vector store.
func (s *Sync) SyncSummary(ctx context.Context, summary *models.SessionSummary) error {
docs := s.formatSummaryDocs(summary)
if len(docs) == 0 {
return nil
}
if err := s.client.AddDocuments(ctx, docs); err != nil {
return fmt.Errorf("add summary docs: %w", err)
}
log.Debug().
Int64("summaryId", summary.ID).
Int("docCount", len(docs)).
Msg("Synced summary to sqlite-vec")
return nil
}
// formatSummaryDocs formats a session summary into vector documents.
func (s *Sync) formatSummaryDocs(summary *models.SessionSummary) []Document {
docs := make([]Document, 0, 6)
baseMetadata := map[string]any{
"sqlite_id": summary.ID,
"doc_type": "session_summary",
"sdk_session_id": summary.SDKSessionID,
"project": summary.Project,
"scope": "", // Summaries don't have scope
"created_at_epoch": summary.CreatedAtEpoch,
}
if summary.PromptNumber.Valid {
baseMetadata["prompt_number"] = summary.PromptNumber.Int64
}
// Each field as separate document
fields := []struct {
name string
value string
valid bool
}{
{"request", summary.Request.String, summary.Request.Valid},
{"investigated", summary.Investigated.String, summary.Investigated.Valid},
{"learned", summary.Learned.String, summary.Learned.Valid},
{"completed", summary.Completed.String, summary.Completed.Valid},
{"next_steps", summary.NextSteps.String, summary.NextSteps.Valid},
{"notes", summary.Notes.String, summary.Notes.Valid},
}
for _, field := range fields {
if field.valid && field.value != "" {
docs = append(docs, Document{
ID: fmt.Sprintf("summary_%d_%s", summary.ID, field.name),
Content: field.value,
Metadata: copyMetadata(baseMetadata, "field_type", field.name),
})
}
}
return docs
}
// SyncUserPrompt syncs a single user prompt to the vector store.
func (s *Sync) SyncUserPrompt(ctx context.Context, prompt *models.UserPromptWithSession) error {
doc := Document{
ID: fmt.Sprintf("prompt_%d", prompt.ID),
Content: prompt.PromptText,
Metadata: map[string]any{
"sqlite_id": prompt.ID,
"doc_type": "user_prompt",
"sdk_session_id": prompt.SDKSessionID,
"project": prompt.Project,
"scope": "", // Prompts don't have scope
"created_at_epoch": prompt.CreatedAtEpoch,
"prompt_number": prompt.PromptNumber,
"field_type": "prompt",
},
}
if err := s.client.AddDocuments(ctx, []Document{doc}); err != nil {
return fmt.Errorf("add prompt doc: %w", err)
}
log.Debug().
Int64("promptId", prompt.ID).
Msg("Synced user prompt to sqlite-vec")
return nil
}
// DeleteObservations removes observation documents from the vector store.
// Includes both observation fields (narrative, facts) and code chunks.
func (s *Sync) DeleteObservations(ctx context.Context, observationIDs []int64) error {
if len(observationIDs) == 0 {
return nil
}
// Generate all possible document IDs for these observations
// Pattern: obs_{id}_narrative, obs_{id}_fact_{0..n}, obs_{id}_chunk_{0..n}
const maxFactsPerObs = 20
const maxChunksPerObs = 100 // Reasonable upper bound for code chunks
ids := make([]string, 0, len(observationIDs)*(maxFactsPerObs+maxChunksPerObs+1))
for _, obsID := range observationIDs {
ids = append(ids, fmt.Sprintf("obs_%d_narrative", obsID))
for i := 0; i < maxFactsPerObs; i++ {
ids = append(ids, fmt.Sprintf("obs_%d_fact_%d", obsID, i))
}
// Include code chunk IDs
for i := 0; i < maxChunksPerObs; i++ {
ids = append(ids, fmt.Sprintf("obs_%d_chunk_%d", obsID, i))
}
}
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
return fmt.Errorf("delete observation docs: %w", err)
}
log.Debug().
Int("observationCount", len(observationIDs)).
Msg("Deleted observations from sqlite-vec")
return nil
}
// DeleteUserPrompts removes user prompt documents from the vector store.
func (s *Sync) DeleteUserPrompts(ctx context.Context, promptIDs []int64) error {
if len(promptIDs) == 0 {
return nil
}
ids := make([]string, len(promptIDs))
for i, promptID := range promptIDs {
ids[i] = fmt.Sprintf("prompt_%d", promptID)
}
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
return fmt.Errorf("delete prompt docs: %w", err)
}
log.Debug().
Int("promptCount", len(promptIDs)).
Msg("Deleted user prompts from sqlite-vec")
return nil
}
// SyncPattern syncs a single pattern to the vector store.
func (s *Sync) SyncPattern(ctx context.Context, pattern *models.Pattern) error {
docs := s.formatPatternDocs(pattern)
if len(docs) == 0 {
return nil
}
if err := s.client.AddDocuments(ctx, docs); err != nil {
return fmt.Errorf("add pattern docs: %w", err)
}
log.Debug().
Int64("patternId", pattern.ID).
Int("docCount", len(docs)).
Msg("Synced pattern to sqlite-vec")
return nil
}
// formatPatternDocs formats a pattern into vector documents.
func (s *Sync) formatPatternDocs(pattern *models.Pattern) []Document {
docs := make([]Document, 0, 3)
baseMetadata := map[string]any{
"sqlite_id": pattern.ID,
"doc_type": "pattern",
"pattern_type": string(pattern.Type),
"status": string(pattern.Status),
"scope": "global", // Patterns are always global
"frequency": pattern.Frequency,
"confidence": pattern.Confidence,
"created_at_epoch": pattern.CreatedAtEpoch,
}
if len(pattern.Signature) > 0 {
baseMetadata["signature"] = joinStrings(pattern.Signature, ",")
}
if len(pattern.Projects) > 0 {
baseMetadata["projects"] = joinStrings(pattern.Projects, ",")
}
// Pattern name as document
if pattern.Name != "" {
docs = append(docs, Document{
ID: fmt.Sprintf("pattern_%d_name", pattern.ID),
Content: pattern.Name,
Metadata: copyMetadata(baseMetadata, "field_type", "name"),
})
}
// Pattern description as document
if pattern.Description.Valid && pattern.Description.String != "" {
docs = append(docs, Document{
ID: fmt.Sprintf("pattern_%d_description", pattern.ID),
Content: pattern.Description.String,
Metadata: copyMetadata(baseMetadata, "field_type", "description"),
})
}
// Pattern recommendation as document
if pattern.Recommendation.Valid && pattern.Recommendation.String != "" {
docs = append(docs, Document{
ID: fmt.Sprintf("pattern_%d_recommendation", pattern.ID),
Content: pattern.Recommendation.String,
Metadata: copyMetadata(baseMetadata, "field_type", "recommendation"),
})
}
return docs
}
// DeletePatterns removes pattern documents from the vector store.
func (s *Sync) DeletePatterns(ctx context.Context, patternIDs []int64) error {
if len(patternIDs) == 0 {
return nil
}
// Generate all possible document IDs for these patterns
// Pattern: pattern_{id}_name, pattern_{id}_description, pattern_{id}_recommendation
ids := make([]string, 0, len(patternIDs)*3)
for _, patternID := range patternIDs {
ids = append(ids, fmt.Sprintf("pattern_%d_name", patternID))
ids = append(ids, fmt.Sprintf("pattern_%d_description", patternID))
ids = append(ids, fmt.Sprintf("pattern_%d_recommendation", patternID))
}
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
return fmt.Errorf("delete pattern docs: %w", err)
}
log.Debug().
Int("patternCount", len(patternIDs)).
Msg("Deleted patterns from sqlite-vec")
return nil
}