feat(chunking): add AST-aware code chunking for Go, Python, TypeScript

- [x] Add language-specific chunkers with AST parsing (Go, Python, TypeScript)
- [x] Implement chunking manager to dispatch files to appropriate chunkers
- [x] Integrate code chunks into vector sync for semantic search
- [x] Add tree-sitter dependency for Python/TypeScript parsing
- [x] Reorder struct fields for consistency across codebase
- [x] Rename error variables to follow Go conventions (err → unmarshalErr, etc.)
- [x] Add code chunk metadata to vector documents (language, symbol name, line ranges)
- [x] Update worker service to initialize chunking pipeline with all three languages
This commit is contained in:
2026-01-07 13:19:58 +00:00
parent 40a44a71eb
commit 4f4b4ac70f
78 changed files with 2313 additions and 652 deletions
+4 -3
View File
@@ -8,21 +8,22 @@ const (
DocTypeObservation DocType = "observation"
DocTypeSessionSummary DocType = "session_summary"
DocTypeUserPrompt DocType = "user_prompt"
DocTypeCodeChunk DocType = "code_chunk"
)
// Document represents a document to store with vector embedding.
type Document struct {
Metadata map[string]any
ID string
Content string
Metadata map[string]any
}
// QueryResult represents a search result from vector search.
type QueryResult struct {
Metadata map[string]any
ID string
Distance float64
Similarity float64 // 1.0 = identical, 0.0 = opposite (derived from distance)
Metadata map[string]any
Similarity float64
}
// DistanceToSimilarity converts sqlite-vec cosine distance to similarity score.