mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-05 23:03:55 +00:00
525 lines
14 KiB
Go
525 lines
14 KiB
Go
// Package sqlitevec provides sqlite-vec based vector database integration for claude-mnemonic.
|
|
package sqlitevec
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// Sync provides synchronization between SQLite data and vector embeddings.
|
|
type Sync struct {
|
|
client *Client
|
|
}
|
|
|
|
// NewSync creates a new sync service.
|
|
func NewSync(client *Client) *Sync {
|
|
return &Sync{client: client}
|
|
}
|
|
|
|
// SyncObservation syncs a single observation to the vector store.
|
|
func (s *Sync) SyncObservation(ctx context.Context, obs *models.Observation) error {
|
|
docs := s.formatObservationDocs(obs)
|
|
if len(docs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
|
return fmt.Errorf("add observation docs: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int64("observationId", obs.ID).
|
|
Int("docCount", len(docs)).
|
|
Msg("Synced observation to sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// formatObservationDocs formats an observation into vector documents.
|
|
// Each semantic field becomes a separate vector document (granular approach).
|
|
func (s *Sync) formatObservationDocs(obs *models.Observation) []Document {
|
|
docs := make([]Document, 0, len(obs.Facts)+2)
|
|
|
|
// Determine scope for metadata
|
|
scope := string(obs.Scope)
|
|
if scope == "" {
|
|
scope = "project"
|
|
}
|
|
|
|
baseMetadata := map[string]any{
|
|
"sqlite_id": obs.ID,
|
|
"doc_type": "observation",
|
|
"sdk_session_id": obs.SDKSessionID,
|
|
"project": obs.Project,
|
|
"scope": scope,
|
|
"created_at_epoch": obs.CreatedAtEpoch,
|
|
"type": string(obs.Type),
|
|
}
|
|
|
|
if obs.Title.Valid {
|
|
baseMetadata["title"] = obs.Title.String
|
|
}
|
|
if obs.Subtitle.Valid {
|
|
baseMetadata["subtitle"] = obs.Subtitle.String
|
|
}
|
|
if len(obs.Concepts) > 0 {
|
|
baseMetadata["concepts"] = joinStrings(obs.Concepts, ",")
|
|
}
|
|
if len(obs.FilesRead) > 0 {
|
|
baseMetadata["files_read"] = joinStrings(obs.FilesRead, ",")
|
|
}
|
|
if len(obs.FilesModified) > 0 {
|
|
baseMetadata["files_modified"] = joinStrings(obs.FilesModified, ",")
|
|
}
|
|
|
|
// Narrative as separate document
|
|
if obs.Narrative.Valid && obs.Narrative.String != "" {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("obs_%d_narrative", obs.ID),
|
|
Content: obs.Narrative.String,
|
|
Metadata: copyMetadata(baseMetadata, "field_type", "narrative"),
|
|
})
|
|
}
|
|
|
|
// Each fact as separate document
|
|
for i, fact := range obs.Facts {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("obs_%d_fact_%d", obs.ID, i),
|
|
Content: fact,
|
|
Metadata: copyMetadataMulti(baseMetadata, map[string]any{
|
|
"field_type": "fact",
|
|
"fact_index": i,
|
|
}),
|
|
})
|
|
}
|
|
|
|
return docs
|
|
}
|
|
|
|
// SyncSummary syncs a single session summary to the vector store.
|
|
func (s *Sync) SyncSummary(ctx context.Context, summary *models.SessionSummary) error {
|
|
docs := s.formatSummaryDocs(summary)
|
|
if len(docs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
|
return fmt.Errorf("add summary docs: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int64("summaryId", summary.ID).
|
|
Int("docCount", len(docs)).
|
|
Msg("Synced summary to sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// formatSummaryDocs formats a session summary into vector documents.
|
|
func (s *Sync) formatSummaryDocs(summary *models.SessionSummary) []Document {
|
|
docs := make([]Document, 0, 6)
|
|
|
|
baseMetadata := map[string]any{
|
|
"sqlite_id": summary.ID,
|
|
"doc_type": "session_summary",
|
|
"sdk_session_id": summary.SDKSessionID,
|
|
"project": summary.Project,
|
|
"scope": "", // Summaries don't have scope
|
|
"created_at_epoch": summary.CreatedAtEpoch,
|
|
}
|
|
|
|
if summary.PromptNumber.Valid {
|
|
baseMetadata["prompt_number"] = summary.PromptNumber.Int64
|
|
}
|
|
|
|
// Each field as separate document
|
|
fields := []struct {
|
|
name string
|
|
value string
|
|
valid bool
|
|
}{
|
|
{"request", summary.Request.String, summary.Request.Valid},
|
|
{"investigated", summary.Investigated.String, summary.Investigated.Valid},
|
|
{"learned", summary.Learned.String, summary.Learned.Valid},
|
|
{"completed", summary.Completed.String, summary.Completed.Valid},
|
|
{"next_steps", summary.NextSteps.String, summary.NextSteps.Valid},
|
|
{"notes", summary.Notes.String, summary.Notes.Valid},
|
|
}
|
|
|
|
for _, field := range fields {
|
|
if field.valid && field.value != "" {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("summary_%d_%s", summary.ID, field.name),
|
|
Content: field.value,
|
|
Metadata: copyMetadata(baseMetadata, "field_type", field.name),
|
|
})
|
|
}
|
|
}
|
|
|
|
return docs
|
|
}
|
|
|
|
// SyncUserPrompt syncs a single user prompt to the vector store.
|
|
func (s *Sync) SyncUserPrompt(ctx context.Context, prompt *models.UserPromptWithSession) error {
|
|
doc := Document{
|
|
ID: fmt.Sprintf("prompt_%d", prompt.ID),
|
|
Content: prompt.PromptText,
|
|
Metadata: map[string]any{
|
|
"sqlite_id": prompt.ID,
|
|
"doc_type": "user_prompt",
|
|
"sdk_session_id": prompt.SDKSessionID,
|
|
"project": prompt.Project,
|
|
"scope": "", // Prompts don't have scope
|
|
"created_at_epoch": prompt.CreatedAtEpoch,
|
|
"prompt_number": prompt.PromptNumber,
|
|
"field_type": "prompt",
|
|
},
|
|
}
|
|
|
|
if err := s.client.AddDocuments(ctx, []Document{doc}); err != nil {
|
|
return fmt.Errorf("add prompt doc: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int64("promptId", prompt.ID).
|
|
Msg("Synced user prompt to sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// DeleteObservations removes observation documents from the vector store.
|
|
func (s *Sync) DeleteObservations(ctx context.Context, observationIDs []int64) error {
|
|
if len(observationIDs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Generate all possible document IDs for these observations
|
|
// Pattern: obs_{id}_narrative, obs_{id}_fact_{0..n}
|
|
const maxFactsPerObs = 20
|
|
ids := make([]string, 0, len(observationIDs)*(maxFactsPerObs+1))
|
|
|
|
for _, obsID := range observationIDs {
|
|
ids = append(ids, fmt.Sprintf("obs_%d_narrative", obsID))
|
|
for i := 0; i < maxFactsPerObs; i++ {
|
|
ids = append(ids, fmt.Sprintf("obs_%d_fact_%d", obsID, i))
|
|
}
|
|
}
|
|
|
|
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
|
|
return fmt.Errorf("delete observation docs: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int("observationCount", len(observationIDs)).
|
|
Msg("Deleted observations from sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// DeleteUserPrompts removes user prompt documents from the vector store.
|
|
func (s *Sync) DeleteUserPrompts(ctx context.Context, promptIDs []int64) error {
|
|
if len(promptIDs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
ids := make([]string, len(promptIDs))
|
|
for i, promptID := range promptIDs {
|
|
ids[i] = fmt.Sprintf("prompt_%d", promptID)
|
|
}
|
|
|
|
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
|
|
return fmt.Errorf("delete prompt docs: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int("promptCount", len(promptIDs)).
|
|
Msg("Deleted user prompts from sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// SyncPattern syncs a single pattern to the vector store.
|
|
func (s *Sync) SyncPattern(ctx context.Context, pattern *models.Pattern) error {
|
|
docs := s.formatPatternDocs(pattern)
|
|
if len(docs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
|
return fmt.Errorf("add pattern docs: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int64("patternId", pattern.ID).
|
|
Int("docCount", len(docs)).
|
|
Msg("Synced pattern to sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// formatPatternDocs formats a pattern into vector documents.
|
|
func (s *Sync) formatPatternDocs(pattern *models.Pattern) []Document {
|
|
docs := make([]Document, 0, 3)
|
|
|
|
baseMetadata := map[string]any{
|
|
"sqlite_id": pattern.ID,
|
|
"doc_type": "pattern",
|
|
"pattern_type": string(pattern.Type),
|
|
"status": string(pattern.Status),
|
|
"scope": "global", // Patterns are always global
|
|
"frequency": pattern.Frequency,
|
|
"confidence": pattern.Confidence,
|
|
"created_at_epoch": pattern.CreatedAtEpoch,
|
|
}
|
|
|
|
if len(pattern.Signature) > 0 {
|
|
baseMetadata["signature"] = joinStrings(pattern.Signature, ",")
|
|
}
|
|
if len(pattern.Projects) > 0 {
|
|
baseMetadata["projects"] = joinStrings(pattern.Projects, ",")
|
|
}
|
|
|
|
// Pattern name as document
|
|
if pattern.Name != "" {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("pattern_%d_name", pattern.ID),
|
|
Content: pattern.Name,
|
|
Metadata: copyMetadata(baseMetadata, "field_type", "name"),
|
|
})
|
|
}
|
|
|
|
// Pattern description as document
|
|
if pattern.Description.Valid && pattern.Description.String != "" {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("pattern_%d_description", pattern.ID),
|
|
Content: pattern.Description.String,
|
|
Metadata: copyMetadata(baseMetadata, "field_type", "description"),
|
|
})
|
|
}
|
|
|
|
// Pattern recommendation as document
|
|
if pattern.Recommendation.Valid && pattern.Recommendation.String != "" {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("pattern_%d_recommendation", pattern.ID),
|
|
Content: pattern.Recommendation.String,
|
|
Metadata: copyMetadata(baseMetadata, "field_type", "recommendation"),
|
|
})
|
|
}
|
|
|
|
return docs
|
|
}
|
|
|
|
// DeletePatterns removes pattern documents from the vector store.
|
|
func (s *Sync) DeletePatterns(ctx context.Context, patternIDs []int64) error {
|
|
if len(patternIDs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Generate all possible document IDs for these patterns
|
|
// Pattern: pattern_{id}_name, pattern_{id}_description, pattern_{id}_recommendation
|
|
ids := make([]string, 0, len(patternIDs)*3)
|
|
|
|
for _, patternID := range patternIDs {
|
|
ids = append(ids, fmt.Sprintf("pattern_%d_name", patternID))
|
|
ids = append(ids, fmt.Sprintf("pattern_%d_description", patternID))
|
|
ids = append(ids, fmt.Sprintf("pattern_%d_recommendation", patternID))
|
|
}
|
|
|
|
if err := s.client.DeleteDocuments(ctx, ids); err != nil {
|
|
return fmt.Errorf("delete pattern docs: %w", err)
|
|
}
|
|
|
|
log.Debug().
|
|
Int("patternCount", len(patternIDs)).
|
|
Msg("Deleted patterns from sqlite-vec")
|
|
|
|
return nil
|
|
}
|
|
|
|
// BatchSyncConfig configures batch synchronization behavior.
|
|
type BatchSyncConfig struct {
|
|
BatchSize int // Number of items per batch (default: 50)
|
|
ProgressLogFreq int // Log progress every N items (default: 100)
|
|
}
|
|
|
|
// DefaultBatchSyncConfig returns sensible defaults for batch sync.
|
|
func DefaultBatchSyncConfig() BatchSyncConfig {
|
|
return BatchSyncConfig{
|
|
BatchSize: 50,
|
|
ProgressLogFreq: 100,
|
|
}
|
|
}
|
|
|
|
// BatchSyncObservations syncs multiple observations efficiently in batches.
|
|
// This reduces memory pressure during large rebuilds by processing in chunks.
|
|
func (s *Sync) BatchSyncObservations(ctx context.Context, observations []*models.Observation, cfg BatchSyncConfig) (synced int, errors int) {
|
|
if len(observations) == 0 {
|
|
return 0, 0
|
|
}
|
|
|
|
if cfg.BatchSize <= 0 {
|
|
cfg.BatchSize = 50
|
|
}
|
|
if cfg.ProgressLogFreq <= 0 {
|
|
cfg.ProgressLogFreq = 100
|
|
}
|
|
|
|
for i := 0; i < len(observations); i += cfg.BatchSize {
|
|
// Check context cancellation
|
|
select {
|
|
case <-ctx.Done():
|
|
log.Warn().Int("synced", synced).Int("remaining", len(observations)-i).Msg("Batch sync cancelled")
|
|
return synced, errors
|
|
default:
|
|
}
|
|
|
|
end := min(i+cfg.BatchSize, len(observations))
|
|
|
|
batch := observations[i:end]
|
|
var docs []Document
|
|
|
|
// Collect all documents for this batch
|
|
for _, obs := range batch {
|
|
docs = append(docs, s.formatObservationDocs(obs)...)
|
|
}
|
|
|
|
// Add all documents in one call
|
|
if len(docs) > 0 {
|
|
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
|
log.Warn().Err(err).Int("batchStart", i).Int("batchSize", len(batch)).Msg("Failed to sync observation batch")
|
|
errors += len(batch)
|
|
continue
|
|
}
|
|
}
|
|
|
|
synced += len(batch)
|
|
|
|
// Log progress periodically
|
|
if synced%cfg.ProgressLogFreq == 0 || synced == len(observations) {
|
|
log.Debug().Int("synced", synced).Int("total", len(observations)).Msg("Observation batch sync progress")
|
|
}
|
|
}
|
|
|
|
return synced, errors
|
|
}
|
|
|
|
// BatchSyncSummaries syncs multiple summaries efficiently in batches.
|
|
func (s *Sync) BatchSyncSummaries(ctx context.Context, summaries []*models.SessionSummary, cfg BatchSyncConfig) (synced int, errors int) {
|
|
if len(summaries) == 0 {
|
|
return 0, 0
|
|
}
|
|
|
|
if cfg.BatchSize <= 0 {
|
|
cfg.BatchSize = 50
|
|
}
|
|
if cfg.ProgressLogFreq <= 0 {
|
|
cfg.ProgressLogFreq = 100
|
|
}
|
|
|
|
for i := 0; i < len(summaries); i += cfg.BatchSize {
|
|
// Check context cancellation
|
|
select {
|
|
case <-ctx.Done():
|
|
log.Warn().Int("synced", synced).Int("remaining", len(summaries)-i).Msg("Batch sync cancelled")
|
|
return synced, errors
|
|
default:
|
|
}
|
|
|
|
end := min(i+cfg.BatchSize, len(summaries))
|
|
|
|
batch := summaries[i:end]
|
|
var docs []Document
|
|
|
|
// Collect all documents for this batch
|
|
for _, summary := range batch {
|
|
docs = append(docs, s.formatSummaryDocs(summary)...)
|
|
}
|
|
|
|
// Add all documents in one call
|
|
if len(docs) > 0 {
|
|
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
|
log.Warn().Err(err).Int("batchStart", i).Int("batchSize", len(batch)).Msg("Failed to sync summary batch")
|
|
errors += len(batch)
|
|
continue
|
|
}
|
|
}
|
|
|
|
synced += len(batch)
|
|
|
|
// Log progress periodically
|
|
if synced%cfg.ProgressLogFreq == 0 || synced == len(summaries) {
|
|
log.Debug().Int("synced", synced).Int("total", len(summaries)).Msg("Summary batch sync progress")
|
|
}
|
|
}
|
|
|
|
return synced, errors
|
|
}
|
|
|
|
// BatchSyncPrompts syncs multiple user prompts efficiently in batches.
|
|
func (s *Sync) BatchSyncPrompts(ctx context.Context, prompts []*models.UserPromptWithSession, cfg BatchSyncConfig) (synced int, errors int) {
|
|
if len(prompts) == 0 {
|
|
return 0, 0
|
|
}
|
|
|
|
if cfg.BatchSize <= 0 {
|
|
cfg.BatchSize = 50
|
|
}
|
|
if cfg.ProgressLogFreq <= 0 {
|
|
cfg.ProgressLogFreq = 100
|
|
}
|
|
|
|
for i := 0; i < len(prompts); i += cfg.BatchSize {
|
|
// Check context cancellation
|
|
select {
|
|
case <-ctx.Done():
|
|
log.Warn().Int("synced", synced).Int("remaining", len(prompts)-i).Msg("Batch sync cancelled")
|
|
return synced, errors
|
|
default:
|
|
}
|
|
|
|
end := min(i+cfg.BatchSize, len(prompts))
|
|
|
|
batch := prompts[i:end]
|
|
docs := make([]Document, 0, len(batch))
|
|
|
|
// Collect all documents for this batch
|
|
for _, prompt := range batch {
|
|
docs = append(docs, Document{
|
|
ID: fmt.Sprintf("prompt_%d", prompt.ID),
|
|
Content: prompt.PromptText,
|
|
Metadata: map[string]any{
|
|
"sqlite_id": prompt.ID,
|
|
"doc_type": "user_prompt",
|
|
"sdk_session_id": prompt.SDKSessionID,
|
|
"project": prompt.Project,
|
|
"scope": "",
|
|
"created_at_epoch": prompt.CreatedAtEpoch,
|
|
"prompt_number": prompt.PromptNumber,
|
|
"field_type": "prompt",
|
|
},
|
|
})
|
|
}
|
|
|
|
// Add all documents in one call
|
|
if len(docs) > 0 {
|
|
if err := s.client.AddDocuments(ctx, docs); err != nil {
|
|
log.Warn().Err(err).Int("batchStart", i).Int("batchSize", len(batch)).Msg("Failed to sync prompt batch")
|
|
errors += len(batch)
|
|
continue
|
|
}
|
|
}
|
|
|
|
synced += len(batch)
|
|
|
|
// Log progress periodically
|
|
if synced%cfg.ProgressLogFreq == 0 || synced == len(prompts) {
|
|
log.Debug().Int("synced", synced).Int("total", len(prompts)).Msg("Prompt batch sync progress")
|
|
}
|
|
}
|
|
|
|
return synced, errors
|
|
}
|