feat(chunking): add AST-aware code chunking for Go, Python, TypeScript

- [x] Add language-specific chunkers with AST parsing (Go, Python, TypeScript)
- [x] Implement chunking manager to dispatch files to appropriate chunkers
- [x] Integrate code chunks into vector sync for semantic search
- [x] Add tree-sitter dependency for Python/TypeScript parsing
- [x] Reorder struct fields for consistency across codebase
- [x] Rename error variables to follow Go conventions (err → unmarshalErr, etc.)
- [x] Add code chunk metadata to vector documents (language, symbol name, line ranges)
- [x] Update worker service to initialize chunking pipeline with all three languages
This commit is contained in:
2026-01-07 13:19:58 +00:00
parent 40a44a71eb
commit 4f4b4ac70f
78 changed files with 2313 additions and 652 deletions
+8 -8
View File
@@ -87,9 +87,9 @@ func (c *Client) AddDocuments(ctx context.Context, docs []Document) error {
for i, doc := range docs {
// Serialize embedding to blob format
embBlob, err := sqlite_vec.SerializeFloat32(embeddings[i])
if err != nil {
return fmt.Errorf("serialize embedding for %s: %w", doc.ID, err)
embBlob, serErr := sqlite_vec.SerializeFloat32(embeddings[i])
if serErr != nil {
return fmt.Errorf("serialize embedding for %s: %w", doc.ID, serErr)
}
// Extract metadata
@@ -212,8 +212,8 @@ func (c *Client) Query(ctx context.Context, query string, limit int, where map[s
var sqliteID int64
var docType, fieldType, project, scope sql.NullString
if err := rows.Scan(&r.ID, &r.Distance, &sqliteID, &docType, &fieldType, &project, &scope); err != nil {
return nil, fmt.Errorf("scan row: %w", err)
if scanErr := rows.Scan(&r.ID, &r.Distance, &sqliteID, &docType, &fieldType, &project, &scope); scanErr != nil {
return nil, fmt.Errorf("scan row: %w", scanErr)
}
r.Similarity = DistanceToSimilarity(r.Distance)
@@ -319,11 +319,11 @@ func (c *Client) NeedsRebuild(ctx context.Context) (bool, string) {
// StaleVectorInfo contains information about a vector that needs rebuilding.
type StaleVectorInfo struct {
DocID string
SQLiteID int64
DocType string
FieldType string
Project string
Scope string
SQLiteID int64
}
// GetStaleVectors returns doc_ids of vectors with mismatched or null model versions.
@@ -352,8 +352,8 @@ func (c *Client) GetStaleVectors(ctx context.Context) ([]StaleVectorInfo, error)
var sqliteID sql.NullInt64
var docType, fieldType, project, scope sql.NullString
if err := rows.Scan(&info.DocID, &sqliteID, &docType, &fieldType, &project, &scope); err != nil {
return nil, fmt.Errorf("scan row: %w", err)
if scanErr := rows.Scan(&info.DocID, &sqliteID, &docType, &fieldType, &project, &scope); scanErr != nil {
return nil, fmt.Errorf("scan row: %w", scanErr)
}
info.SQLiteID = sqliteID.Int64