Resolves issue #13

- Switched model to bge-small-en-v1.5
- Added lazy re-embedding
- Added model version tracking per vector
- Added conversion of vectors to the new model
This commit is contained in:
2025-12-19 02:00:55 +00:00
parent 8867f13dcc
commit a37649bc69
18 changed files with 1072 additions and 87 deletions
+21
View File
@@ -283,6 +283,27 @@ var Migrations = []Migration{
ON user_prompts(claude_session_id, prompt_number);
`,
},
{
Version: 19,
Name: "vectors_with_model_version",
SQL: `
-- Drop old vectors table (virtual tables cannot be altered)
DROP TABLE IF EXISTS vectors;
-- Recreate vectors table with model_version column
-- Uses bge-small-en-v1.5 embeddings (384 dimensions)
CREATE VIRTUAL TABLE IF NOT EXISTS vectors USING vec0(
doc_id TEXT PRIMARY KEY,
embedding float[384],
sqlite_id INTEGER,
doc_type TEXT,
field_type TEXT,
project TEXT,
scope TEXT,
model_version TEXT
);
`,
},
}
// MigrationManager handles database schema migrations.
+19
View File
@@ -229,6 +229,25 @@ func (s *ObservationStore) GetAllRecentObservations(ctx context.Context, limit i
return scanObservationRows(rows)
}
// GetAllObservations retrieves all observations (for vector rebuild).
func (s *ObservationStore) GetAllObservations(ctx context.Context) ([]*models.Observation, error) {
const query = `
SELECT id, sdk_session_id, project, COALESCE(scope, 'project') as scope, type, title, subtitle, facts, narrative,
concepts, files_read, files_modified, file_mtimes, prompt_number, discovery_tokens,
created_at, created_at_epoch
FROM observations
ORDER BY id
`
rows, err := s.store.QueryContext(ctx, query)
if err != nil {
return nil, err
}
defer rows.Close()
return scanObservationRows(rows)
}
// SearchObservationsFTS performs full-text search on observations.
func (s *ObservationStore) SearchObservationsFTS(ctx context.Context, query, project string, limit int) ([]*models.Observation, error) {
if limit <= 0 {
+22
View File
@@ -199,6 +199,28 @@ func (s *PromptStore) GetAllRecentUserPrompts(ctx context.Context, limit int) ([
return scanPromptWithSessionRows(rows)
}
// GetAllPrompts retrieves all user prompts (for vector rebuild).
func (s *PromptStore) GetAllPrompts(ctx context.Context) ([]*models.UserPromptWithSession, error) {
const query = `
SELECT up.id, up.claude_session_id, up.prompt_number, up.prompt_text,
COALESCE(up.matched_observations, 0) as matched_observations,
up.created_at, up.created_at_epoch,
COALESCE(s.project, '') as project,
COALESCE(s.sdk_session_id, '') as sdk_session_id
FROM user_prompts up
LEFT JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
ORDER BY up.id
`
rows, err := s.store.QueryContext(ctx, query)
if err != nil {
return nil, err
}
defer rows.Close()
return scanPromptWithSessionRows(rows)
}
// FindRecentPromptByText finds a prompt with the same text for a session within the last few seconds.
// This is used to detect duplicate hook invocations.
// Returns (promptID, promptNumber, found).
+18
View File
@@ -116,3 +116,21 @@ func (s *SummaryStore) GetAllRecentSummaries(ctx context.Context, limit int) ([]
return scanSummaryRows(rows)
}
// GetAllSummaries retrieves all summaries (for vector rebuild).
func (s *SummaryStore) GetAllSummaries(ctx context.Context) ([]*models.SessionSummary, error) {
const query = `
SELECT id, sdk_session_id, project, request, investigated, learned, completed,
next_steps, notes, prompt_number, discovery_tokens, created_at, created_at_epoch
FROM session_summaries
ORDER BY id
`
rows, err := s.store.QueryContext(ctx, query)
if err != nil {
return nil, err
}
defer rows.Close()
return scanSummaryRows(rows)
}