Files
claude-mnemonic/internal/vector/sqlitevec/helpers.go
T
lukaszraczylo 4f4b4ac70f feat(chunking): add AST-aware code chunking for Go, Python, TypeScript
- [x] Add language-specific chunkers with AST parsing (Go, Python, TypeScript)
- [x] Implement chunking manager to dispatch files to appropriate chunkers
- [x] Integrate code chunks into vector sync for semantic search
- [x] Add tree-sitter dependency for Python/TypeScript parsing
- [x] Reorder struct fields for consistency across codebase
- [x] Rename error variables to follow Go conventions (err → unmarshalErr, etc.)
- [x] Add code chunk metadata to vector documents (language, symbol name, line ranges)
- [x] Update worker service to initialize chunking pipeline with all three languages
2026-01-07 13:19:58 +00:00

259 lines
6.1 KiB
Go

// Package sqlitevec provides sqlite-vec based vector database integration for claude-mnemonic.
package sqlitevec
// DocType represents the type of document stored in the vector table.
type DocType string
const (
DocTypeObservation DocType = "observation"
DocTypeSessionSummary DocType = "session_summary"
DocTypeUserPrompt DocType = "user_prompt"
DocTypeCodeChunk DocType = "code_chunk"
)
// Document represents a document to store with vector embedding.
type Document struct {
Metadata map[string]any
ID string
Content string
}
// QueryResult represents a search result from vector search.
type QueryResult struct {
Metadata map[string]any
ID string
Distance float64
Similarity float64
}
// DistanceToSimilarity converts sqlite-vec cosine distance to similarity score.
// Cosine distance: 0 = identical, 2 = opposite
// Similarity: 1.0 = identical, 0.0 = opposite
func DistanceToSimilarity(distance float64) float64 {
return 1.0 - (distance / 2.0)
}
// FilterByThreshold filters results to only include those above the similarity threshold.
// If maxResults > 0, also caps the number of results.
func FilterByThreshold(results []QueryResult, threshold float64, maxResults int) []QueryResult {
var filtered []QueryResult
for _, r := range results {
if r.Similarity >= threshold {
filtered = append(filtered, r)
if maxResults > 0 && len(filtered) >= maxResults {
break
}
}
}
return filtered
}
// ExtractedIDs contains SQLite IDs extracted from query results, grouped by document type.
type ExtractedIDs struct {
ObservationIDs []int64
SummaryIDs []int64
PromptIDs []int64
}
// BuildWhereFilter creates a where filter map for vector queries.
// If docType is empty, no doc_type filter is added.
func BuildWhereFilter(docType DocType, project string) map[string]interface{} {
where := make(map[string]interface{})
if docType != "" {
where["doc_type"] = string(docType)
}
if project != "" {
where["project"] = project
}
return where
}
// ExtractIDsByDocType extracts SQLite IDs from query results,
// grouped by document type and deduplicated.
func ExtractIDsByDocType(results []QueryResult) *ExtractedIDs {
ids := &ExtractedIDs{}
seenObs := make(map[int64]bool)
seenSummary := make(map[int64]bool)
seenPrompt := make(map[int64]bool)
for _, result := range results {
sqliteID, ok := result.Metadata["sqlite_id"].(float64)
if !ok {
// Try int64 directly
if id, ok := result.Metadata["sqlite_id"].(int64); ok {
sqliteID = float64(id)
} else {
continue
}
}
id := int64(sqliteID)
docType, _ := result.Metadata["doc_type"].(string)
switch docType {
case string(DocTypeObservation):
if !seenObs[id] {
seenObs[id] = true
ids.ObservationIDs = append(ids.ObservationIDs, id)
}
case string(DocTypeSessionSummary):
if !seenSummary[id] {
seenSummary[id] = true
ids.SummaryIDs = append(ids.SummaryIDs, id)
}
case string(DocTypeUserPrompt):
if !seenPrompt[id] {
seenPrompt[id] = true
ids.PromptIDs = append(ids.PromptIDs, id)
}
}
}
return ids
}
// ExtractObservationIDs extracts observation SQLite IDs from query results,
// optionally filtering by project or including global scope.
func ExtractObservationIDs(results []QueryResult, project string) []int64 {
var ids []int64
seen := make(map[int64]bool)
for _, result := range results {
sqliteID, ok := result.Metadata["sqlite_id"].(float64)
if !ok {
if id, ok := result.Metadata["sqlite_id"].(int64); ok {
sqliteID = float64(id)
} else {
continue
}
}
id := int64(sqliteID)
docType, _ := result.Metadata["doc_type"].(string)
if docType != string(DocTypeObservation) {
continue
}
if project != "" {
proj, _ := result.Metadata["project"].(string)
scope, _ := result.Metadata["scope"].(string)
if proj != project && scope != "global" {
continue
}
}
if !seen[id] {
seen[id] = true
ids = append(ids, id)
}
}
return ids
}
// ExtractSummaryIDs extracts session summary SQLite IDs from query results.
func ExtractSummaryIDs(results []QueryResult, project string) []int64 {
var ids []int64
seen := make(map[int64]bool)
for _, result := range results {
sqliteID, ok := result.Metadata["sqlite_id"].(float64)
if !ok {
if id, ok := result.Metadata["sqlite_id"].(int64); ok {
sqliteID = float64(id)
} else {
continue
}
}
id := int64(sqliteID)
docType, _ := result.Metadata["doc_type"].(string)
if docType != string(DocTypeSessionSummary) {
continue
}
if project != "" {
proj, _ := result.Metadata["project"].(string)
if proj != project {
continue
}
}
if !seen[id] {
seen[id] = true
ids = append(ids, id)
}
}
return ids
}
// ExtractPromptIDs extracts user prompt SQLite IDs from query results.
func ExtractPromptIDs(results []QueryResult, project string) []int64 {
var ids []int64
seen := make(map[int64]bool)
for _, result := range results {
sqliteID, ok := result.Metadata["sqlite_id"].(float64)
if !ok {
if id, ok := result.Metadata["sqlite_id"].(int64); ok {
sqliteID = float64(id)
} else {
continue
}
}
id := int64(sqliteID)
docType, _ := result.Metadata["doc_type"].(string)
if docType != string(DocTypeUserPrompt) {
continue
}
if project != "" {
proj, _ := result.Metadata["project"].(string)
if proj != project {
continue
}
}
if !seen[id] {
seen[id] = true
ids = append(ids, id)
}
}
return ids
}
// Helper functions for metadata manipulation
func copyMetadata(base map[string]any, key string, value any) map[string]any {
result := make(map[string]any, len(base)+1)
for k, v := range base {
result[k] = v
}
result[key] = value
return result
}
func copyMetadataMulti(base map[string]any, extra map[string]any) map[string]any {
result := make(map[string]any, len(base)+len(extra))
for k, v := range base {
result[k] = v
}
for k, v := range extra {
result[k] = v
}
return result
}
func joinStrings(strs []string, sep string) string {
if len(strs) == 0 {
return ""
}
result := strs[0]
for i := 1; i < len(strs); i++ {
result += sep + strs[i]
}
return result
}