From af19b6a798a055fae041568dcc78366368691ce6 Mon Sep 17 00:00:00 2001 From: Lukasz Raczylo Date: Fri, 19 Jun 2026 14:01:42 +0100 Subject: [PATCH] refactor: remove dead internal/chunking package The chunking Manager had zero production callers (only its own tests). Removed the package and ran go mod tidy, dropping the now-orphaned go-tree-sitter dependency. --- go.mod | 1 - go.sum | 2 - internal/chunking/golang/chunker.go | 285 ------------- internal/chunking/golang/chunker_test.go | 214 ---------- internal/chunking/manager.go | 106 ----- internal/chunking/manager_test.go | 162 -------- internal/chunking/python/chunker.go | 291 ------------- internal/chunking/python/chunker_test.go | 298 -------------- internal/chunking/types.go | 140 ------- internal/chunking/types_test.go | 213 ---------- internal/chunking/typescript/chunker.go | 403 ------------------- internal/chunking/typescript/chunker_test.go | 398 ------------------ 12 files changed, 2513 deletions(-) delete mode 100644 internal/chunking/golang/chunker.go delete mode 100644 internal/chunking/golang/chunker_test.go delete mode 100644 internal/chunking/manager.go delete mode 100644 internal/chunking/manager_test.go delete mode 100644 internal/chunking/python/chunker.go delete mode 100644 internal/chunking/python/chunker_test.go delete mode 100644 internal/chunking/types.go delete mode 100644 internal/chunking/types_test.go delete mode 100644 internal/chunking/typescript/chunker.go delete mode 100644 internal/chunking/typescript/chunker_test.go diff --git a/go.mod b/go.mod index e9e08cd..10be35a 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( github.com/lukaszraczylo/oss-telemetry v0.2.3 github.com/mattn/go-sqlite3 v1.14.46 github.com/rs/zerolog v1.35.1 - github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 github.com/stretchr/testify v1.11.1 github.com/sugarme/tokenizer v0.3.0 github.com/yalue/onnxruntime_go v1.31.0 diff --git a/go.sum b/go.sum index 3085ef5..686ec77 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,6 @@ github.com/rs/zerolog v1.35.1 h1:m7xQeoiLIiV0BCEY4Hs+j2NG4Gp2o2KPKmhnnLiazKI= github.com/rs/zerolog v1.35.1/go.mod h1:EjML9kdfa/RMA7h/6z6pYmq1ykOuA8/mjWaEvGI+jcw= github.com/schollz/progressbar/v2 v2.15.0 h1:dVzHQ8fHRmtPjD3K10jT3Qgn/+H+92jhPrhmxIJfDz8= github.com/schollz/progressbar/v2 v2.15.0/go.mod h1:UdPq3prGkfQ7MOzZKlDRpYKcFqEMczbD7YmbPgpzKMI= -github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4= -github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= diff --git a/internal/chunking/golang/chunker.go b/internal/chunking/golang/chunker.go deleted file mode 100644 index c267cf5..0000000 --- a/internal/chunking/golang/chunker.go +++ /dev/null @@ -1,285 +0,0 @@ -// Package golang provides AST-aware chunking for Go source files. -package golang - -import ( - "context" - "fmt" - "go/ast" - "go/parser" - "go/token" - "os" - "strings" - - "github.com/lukaszraczylo/claude-mnemonic/internal/chunking" -) - -// Chunker implements AST-aware chunking for Go files. -type Chunker struct { - options chunking.ChunkOptions -} - -// NewChunker creates a new Go chunker. -func NewChunker(options chunking.ChunkOptions) *Chunker { - return &Chunker{options: options} -} - -// Language returns the language this chunker supports. -func (c *Chunker) Language() chunking.Language { - return chunking.LanguageGo -} - -// SupportedExtensions returns the file extensions this chunker handles. -func (c *Chunker) SupportedExtensions() []string { - return []string{".go"} -} - -// Chunk parses a Go source file and returns semantic code chunks. -func (c *Chunker) Chunk(ctx context.Context, filePath string) ([]chunking.Chunk, error) { - // Read file content - content, err := os.ReadFile(filePath) - if err != nil { - return nil, fmt.Errorf("read file: %w", err) - } - - // Parse the Go file - fset := token.NewFileSet() - file, err := parser.ParseFile(fset, filePath, content, parser.ParseComments) - if err != nil { - return nil, fmt.Errorf("parse Go file: %w", err) - } - - chunks := make([]chunking.Chunk, 0) - sourceLines := strings.Split(string(content), "\n") - - // Extract chunks from declarations - for _, decl := range file.Decls { - switch d := decl.(type) { - case *ast.FuncDecl: - chunk := c.extractFunction(fset, d, sourceLines, filePath) - if chunk != nil { - chunks = append(chunks, *chunk) - } - case *ast.GenDecl: - extracted := c.extractGenDecl(fset, d, sourceLines, filePath) - chunks = append(chunks, extracted...) - } - } - - return chunks, nil -} - -// extractFunction extracts a function or method declaration as a chunk. -func (c *Chunker) extractFunction(fset *token.FileSet, fn *ast.FuncDecl, sourceLines []string, filePath string) *chunking.Chunk { - // Skip unexported if configured - if !c.options.IncludePrivate && !fn.Name.IsExported() { - return nil - } - - startPos := fset.Position(fn.Pos()) - endPos := fset.Position(fn.End()) - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageGo, - Name: fn.Name.Name, - StartLine: startPos.Line, - EndLine: endPos.Line, - } - - // Determine if this is a method or a function - if fn.Recv != nil && len(fn.Recv.List) > 0 { - chunk.Type = chunking.ChunkTypeMethod - chunk.ParentName = c.extractReceiverType(fn.Recv) - } else { - chunk.Type = chunking.ChunkTypeFunction - } - - // Extract content - chunk.Content = c.extractLines(sourceLines, startPos.Line, endPos.Line) - - // Extract signature (function declaration without body) - chunk.Signature = c.extractFunctionSignature(fn, fset, sourceLines) - - // Extract doc comment - if c.options.IncludeDocComments && fn.Doc != nil { - chunk.DocComment = strings.TrimSpace(fn.Doc.Text()) - } - - return chunk -} - -// extractGenDecl extracts general declarations (type, const, var). -func (c *Chunker) extractGenDecl(fset *token.FileSet, gd *ast.GenDecl, sourceLines []string, filePath string) []chunking.Chunk { - var chunks []chunking.Chunk - - for _, spec := range gd.Specs { - switch s := spec.(type) { - case *ast.TypeSpec: - chunk := c.extractTypeSpec(fset, gd, s, sourceLines, filePath) - if chunk != nil { - chunks = append(chunks, *chunk) - } - case *ast.ValueSpec: - // Handle const and var declarations - chunk := c.extractValueSpec(fset, gd, s, sourceLines, filePath) - if chunk != nil { - chunks = append(chunks, *chunk) - } - } - } - - return chunks -} - -// extractTypeSpec extracts a type declaration (struct, interface, type alias). -func (c *Chunker) extractTypeSpec(fset *token.FileSet, gd *ast.GenDecl, ts *ast.TypeSpec, sourceLines []string, filePath string) *chunking.Chunk { - // Skip unexported if configured - if !c.options.IncludePrivate && !ts.Name.IsExported() { - return nil - } - - startPos := fset.Position(gd.Pos()) - endPos := fset.Position(gd.End()) - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageGo, - Name: ts.Name.Name, - StartLine: startPos.Line, - EndLine: endPos.Line, - Content: c.extractLines(sourceLines, startPos.Line, endPos.Line), - } - - // Determine chunk type based on type expression - switch ts.Type.(type) { - case *ast.StructType: - chunk.Type = chunking.ChunkTypeClass // Treat struct as class - case *ast.InterfaceType: - chunk.Type = chunking.ChunkTypeInterface - default: - chunk.Type = chunking.ChunkTypeType - } - - // Extract doc comment - if c.options.IncludeDocComments && gd.Doc != nil { - chunk.DocComment = strings.TrimSpace(gd.Doc.Text()) - } - - return chunk -} - -// extractValueSpec extracts const or var declarations. -func (c *Chunker) extractValueSpec(fset *token.FileSet, gd *ast.GenDecl, vs *ast.ValueSpec, sourceLines []string, filePath string) *chunking.Chunk { - // Skip if all names are unexported and we're excluding private - if !c.options.IncludePrivate { - allUnexported := true - for _, name := range vs.Names { - if name.IsExported() { - allUnexported = false - break - } - } - if allUnexported { - return nil - } - } - - startPos := fset.Position(gd.Pos()) - endPos := fset.Position(gd.End()) - - // Use first name as the chunk name, join multiple if present - names := make([]string, len(vs.Names)) - for i, name := range vs.Names { - names[i] = name.Name - } - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageGo, - Name: strings.Join(names, ", "), - StartLine: startPos.Line, - EndLine: endPos.Line, - Content: c.extractLines(sourceLines, startPos.Line, endPos.Line), - } - - // Set type based on token - if gd.Tok == token.CONST { - chunk.Type = chunking.ChunkTypeConst - } else { - chunk.Type = chunking.ChunkTypeVar - } - - // Extract doc comment - if c.options.IncludeDocComments && gd.Doc != nil { - chunk.DocComment = strings.TrimSpace(gd.Doc.Text()) - } - - return chunk -} - -// extractReceiverType extracts the receiver type name from a method. -func (c *Chunker) extractReceiverType(recv *ast.FieldList) string { - if len(recv.List) == 0 { - return "" - } - - field := recv.List[0] - switch t := field.Type.(type) { - case *ast.Ident: - return t.Name - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - return ident.Name - } - } - - return "" -} - -// extractFunctionSignature extracts the function signature without the body. -func (c *Chunker) extractFunctionSignature(fn *ast.FuncDecl, fset *token.FileSet, sourceLines []string) string { - if fn.Body == nil { - // No body, return entire declaration - startPos := fset.Position(fn.Pos()) - endPos := fset.Position(fn.End()) - return c.extractLines(sourceLines, startPos.Line, endPos.Line) - } - - // Extract from start of function to just before body - startPos := fset.Position(fn.Pos()) - bodyPos := fset.Position(fn.Body.Pos()) - - // If body is on the same line, extract just that line up to the opening brace - if startPos.Line == bodyPos.Line { - line := sourceLines[startPos.Line-1] - // Find the opening brace position - if idx := strings.Index(line[startPos.Column-1:], "{"); idx >= 0 { - return strings.TrimSpace(line[startPos.Column-1 : startPos.Column-1+idx]) - } - return strings.TrimSpace(line[startPos.Column-1:]) - } - - // Get lines from start to the line containing the opening brace - sig := c.extractLines(sourceLines, startPos.Line, bodyPos.Line) - // Remove the opening brace and anything after it - if idx := strings.Index(sig, "{"); idx >= 0 { - sig = sig[:idx] - } - return strings.TrimSpace(sig) -} - -// extractLines extracts a range of lines from source (1-indexed, inclusive). -func (c *Chunker) extractLines(lines []string, start, end int) string { - if start < 1 || end < start || start > len(lines) { - return "" - } - - // Adjust for 0-indexed array (start and end are 1-indexed) - startIdx := start - 1 - endIdx := end - if endIdx > len(lines) { - endIdx = len(lines) - } - - return strings.Join(lines[startIdx:endIdx], "\n") -} diff --git a/internal/chunking/golang/chunker_test.go b/internal/chunking/golang/chunker_test.go deleted file mode 100644 index f09adc9..0000000 --- a/internal/chunking/golang/chunker_test.go +++ /dev/null @@ -1,214 +0,0 @@ -package golang - -import ( - "context" - "os" - "path/filepath" - "testing" - - "github.com/lukaszraczylo/claude-mnemonic/internal/chunking" -) - -func TestGoChunker_BasicFunctions(t *testing.T) { - // Create temp test file - tmpDir := t.TempDir() - testFile := filepath.Join(tmpDir, "test.go") - - testCode := `package main - -import "fmt" - -// Greet prints a greeting message -func Greet(name string) { - fmt.Printf("Hello, %s!\n", name) -} - -// Add adds two numbers -func Add(a, b int) int { - return a + b -} - -// unexported function should be included by default -func helper() string { - return "helper" -} -` - - if err := os.WriteFile(testFile, []byte(testCode), 0600); err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - // Create chunker with default options - chunker := NewChunker(chunking.DefaultChunkOptions()) - - // Chunk the file - chunks, err := chunker.Chunk(context.Background(), testFile) - if err != nil { - t.Fatalf("Chunk() failed: %v", err) - } - - // Verify we got all functions - if len(chunks) != 3 { - t.Errorf("Expected 3 chunks (Greet, Add, helper), got %d", len(chunks)) - } - - // Verify chunk details - expectedNames := map[string]bool{ - "Greet": false, - "Add": false, - "helper": false, - } - - for _, chunk := range chunks { - if chunk.Type != chunking.ChunkTypeFunction { - t.Errorf("Expected chunk type 'function', got '%s'", chunk.Type) - } - - if chunk.Language != chunking.LanguageGo { - t.Errorf("Expected language 'go', got '%s'", chunk.Language) - } - - if _, ok := expectedNames[chunk.Name]; !ok { - t.Errorf("Unexpected function name: %s", chunk.Name) - } else { - expectedNames[chunk.Name] = true - } - - // Verify content is non-empty - if chunk.Content == "" { - t.Errorf("Chunk %s has empty content", chunk.Name) - } - - // Verify signature is present for functions - if chunk.Signature == "" { - t.Errorf("Chunk %s has empty signature", chunk.Name) - } - } - - // Verify all expected functions were found - for name, found := range expectedNames { - if !found { - t.Errorf("Expected function %s not found", name) - } - } -} - -func TestGoChunker_StructsAndMethods(t *testing.T) { - tmpDir := t.TempDir() - testFile := filepath.Join(tmpDir, "test.go") - - testCode := `package main - -// User represents a user -type User struct { - ID int - Name string -} - -// GetName returns the user's name -func (u *User) GetName() string { - return u.Name -} - -// SetName sets the user's name -func (u *User) SetName(name string) { - u.Name = name -} -` - - if err := os.WriteFile(testFile, []byte(testCode), 0600); err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - chunker := NewChunker(chunking.DefaultChunkOptions()) - chunks, err := chunker.Chunk(context.Background(), testFile) - if err != nil { - t.Fatalf("Chunk() failed: %v", err) - } - - // Should have 1 struct + 2 methods = 3 chunks - if len(chunks) != 3 { - t.Errorf("Expected 3 chunks (User struct, GetName, SetName), got %d", len(chunks)) - } - - // Find the struct and methods - var structChunk, getNameChunk, setNameChunk *chunking.Chunk - for i := range chunks { - switch chunks[i].Name { - case "User": - structChunk = &chunks[i] - case "GetName": - getNameChunk = &chunks[i] - case "SetName": - setNameChunk = &chunks[i] - } - } - - // Verify struct - if structChunk == nil { - t.Fatal("User struct not found") - } - if structChunk.Type != chunking.ChunkTypeClass { - t.Errorf("Expected User to be ChunkTypeClass, got %s", structChunk.Type) - } - - // Verify methods - if getNameChunk == nil { - t.Fatal("GetName method not found") - } - if getNameChunk.Type != chunking.ChunkTypeMethod { - t.Errorf("Expected GetName to be ChunkTypeMethod, got %s", getNameChunk.Type) - } - if getNameChunk.ParentName != "User" { - t.Errorf("Expected GetName parent to be 'User', got '%s'", getNameChunk.ParentName) - } - - if setNameChunk == nil { - t.Fatal("SetName method not found") - } - if setNameChunk.Type != chunking.ChunkTypeMethod { - t.Errorf("Expected SetName to be ChunkTypeMethod, got %s", setNameChunk.Type) - } - if setNameChunk.ParentName != "User" { - t.Errorf("Expected SetName parent to be 'User', got '%s'", setNameChunk.ParentName) - } -} - -func TestGoChunker_DocComments(t *testing.T) { - tmpDir := t.TempDir() - testFile := filepath.Join(tmpDir, "test.go") - - testCode := `package main - -// Calculate performs a calculation. -// It takes two integers and returns their sum. -func Calculate(a, b int) int { - return a + b -} -` - - if err := os.WriteFile(testFile, []byte(testCode), 0600); err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - chunker := NewChunker(chunking.DefaultChunkOptions()) - chunks, err := chunker.Chunk(context.Background(), testFile) - if err != nil { - t.Fatalf("Chunk() failed: %v", err) - } - - if len(chunks) != 1 { - t.Fatalf("Expected 1 chunk, got %d", len(chunks)) - } - - chunk := chunks[0] - if chunk.DocComment == "" { - t.Error("Expected doc comment to be present") - } - - // Doc comment should contain the comment text - expectedComment := "Calculate performs a calculation.\nIt takes two integers and returns their sum." - if chunk.DocComment != expectedComment { - t.Errorf("Expected doc comment '%s', got '%s'", expectedComment, chunk.DocComment) - } -} diff --git a/internal/chunking/manager.go b/internal/chunking/manager.go deleted file mode 100644 index 1115a54..0000000 --- a/internal/chunking/manager.go +++ /dev/null @@ -1,106 +0,0 @@ -package chunking - -import ( - "context" - "fmt" - "path/filepath" - "strings" -) - -// Manager dispatches files to appropriate language-specific chunkers. -type Manager struct { - chunkers map[string]Chunker // extension -> chunker - options ChunkOptions -} - -// NewManager creates a new chunking manager with the given chunkers. -func NewManager(chunkers []Chunker, options ChunkOptions) *Manager { - m := &Manager{ - chunkers: make(map[string]Chunker), - options: options, - } - - // Register chunkers by their supported extensions - for _, chunker := range chunkers { - for _, ext := range chunker.SupportedExtensions() { - m.chunkers[ext] = chunker - } - } - - return m -} - -// ChunkFile chunks a single file using the appropriate language chunker. -// Returns an error if no chunker is found for the file extension. -func (m *Manager) ChunkFile(ctx context.Context, filePath string) ([]Chunk, error) { - ext := strings.ToLower(filepath.Ext(filePath)) - chunker, ok := m.chunkers[ext] - if !ok { - return nil, fmt.Errorf("no chunker for extension %s", ext) - } - - chunks, err := chunker.Chunk(ctx, filePath) - if err != nil { - return nil, fmt.Errorf("chunk %s: %w", filePath, err) - } - - // Apply options-based filtering - filtered := make([]Chunk, 0, len(chunks)) - for _, chunk := range chunks { - // Filter by minimum lines - if m.options.MinLines > 0 { - lineCount := chunk.EndLine - chunk.StartLine + 1 - if lineCount < m.options.MinLines { - continue - } - } - - // Filter by maximum chunk size - if m.options.MaxChunkSize > 0 && len(chunk.Content) > m.options.MaxChunkSize { - // TODO: Consider splitting large chunks intelligently - // For now, skip chunks that are too large - continue - } - - filtered = append(filtered, chunk) - } - - return filtered, nil -} - -// ChunkFiles chunks multiple files in parallel. -// Returns a map of file path to chunks, and any errors encountered. -// Errors for individual files do not stop processing of other files. -func (m *Manager) ChunkFiles(ctx context.Context, filePaths []string) (map[string][]Chunk, []error) { - results := make(map[string][]Chunk) - var errors []error - - for _, filePath := range filePaths { - chunks, err := m.ChunkFile(ctx, filePath) - if err != nil { - errors = append(errors, fmt.Errorf("%s: %w", filePath, err)) - continue - } - if len(chunks) > 0 { - results[filePath] = chunks - } - } - - return results, errors -} - -// SupportsFile checks if the manager can chunk the given file based on extension. -func (m *Manager) SupportsFile(filePath string) bool { - ext := strings.ToLower(filepath.Ext(filePath)) - _, ok := m.chunkers[ext] - return ok -} - -// SupportedExtensions returns all file extensions supported by registered chunkers. -func (m *Manager) SupportedExtensions() []string { - exts := make([]string, 0, len(m.chunkers)) - for ext := range m.chunkers { - exts = append(exts, ext) - } - return exts -} diff --git a/internal/chunking/manager_test.go b/internal/chunking/manager_test.go deleted file mode 100644 index 6c8e867..0000000 --- a/internal/chunking/manager_test.go +++ /dev/null @@ -1,162 +0,0 @@ -package chunking - -import ( - "context" - "os" - "path/filepath" - "testing" -) - -// mockChunker is a test chunker that returns dummy chunks -type mockChunker struct{} - -func (m *mockChunker) Chunk(ctx context.Context, filePath string) ([]Chunk, error) { - // Just return an empty chunk for testing - return []Chunk{ - { - FilePath: filePath, - Language: LanguageGo, - Type: ChunkTypeFunction, - Name: "TestFunc", - StartLine: 1, - EndLine: 1, - Content: "test", - }, - }, nil -} - -func (m *mockChunker) Language() Language { - return LanguageGo -} - -func (m *mockChunker) SupportedExtensions() []string { - return []string{".go", ".py", ".ts"} -} - -func TestManager_ChunkMultipleFiles(t *testing.T) { - tmpDir := t.TempDir() - - // Create a Go file - goFile := filepath.Join(tmpDir, "test.go") - goCode := `package main - -func Hello() string { - return "hello" -} -` - if err := os.WriteFile(goFile, []byte(goCode), 0600); err != nil { - t.Fatalf("Failed to create Go file: %v", err) - } - - // Create a Python file - pyFile := filepath.Join(tmpDir, "test.py") - pyCode := `def greet(name): - return f"Hello, {name}!" - -class User: - def __init__(self, name): - self.name = name -` - if err := os.WriteFile(pyFile, []byte(pyCode), 0600); err != nil { - t.Fatalf("Failed to create Python file: %v", err) - } - - // Create a TypeScript file - tsFile := filepath.Join(tmpDir, "test.ts") - tsCode := `function add(a: number, b: number): number { - return a + b; -} - -class Calculator { - multiply(a: number, b: number): number { - return a * b; - } -} -` - if err := os.WriteFile(tsFile, []byte(tsCode), 0600); err != nil { - t.Fatalf("Failed to create TypeScript file: %v", err) - } - - // Create manager - manager := NewManager([]Chunker{&mockChunker{}}, DefaultChunkOptions()) - - // Test SupportsFile - if !manager.SupportsFile(goFile) { - t.Error("Manager should support .go files") - } - if !manager.SupportsFile(pyFile) { - t.Error("Manager should support .py files") - } - if !manager.SupportsFile(tsFile) { - t.Error("Manager should support .ts files") - } - - unsupportedFile := filepath.Join(tmpDir, "test.txt") - if manager.SupportsFile(unsupportedFile) { - t.Error("Manager should not support .txt files") - } - - // Test ChunkFiles - results, errs := manager.ChunkFiles(context.Background(), []string{goFile, pyFile, tsFile}) - if len(errs) > 0 { - t.Errorf("ChunkFiles returned errors: %v", errs) - } - - if len(results) != 3 { - t.Errorf("Expected results for 3 files, got %d", len(results)) - } - - // Verify each file has chunks - for _, file := range []string{goFile, pyFile, tsFile} { - if chunks, ok := results[file]; !ok || len(chunks) == 0 { - t.Errorf("No chunks found for file %s", file) - } - } -} - -// mockChunkerWithExts is a test chunker with configurable extensions -type mockChunkerWithExts struct { - exts []string -} - -func (m *mockChunkerWithExts) Chunk(ctx context.Context, filePath string) ([]Chunk, error) { - return nil, nil -} - -func (m *mockChunkerWithExts) Language() Language { - return LanguageGo -} - -func (m *mockChunkerWithExts) SupportedExtensions() []string { - return m.exts -} - -func TestManager_SupportedExtensions(t *testing.T) { - - // Create manager with mock chunkers - manager := NewManager([]Chunker{ - &mockChunkerWithExts{exts: []string{".go"}}, - &mockChunkerWithExts{exts: []string{".py", ".pyw"}}, - }, DefaultChunkOptions()) - - exts := manager.SupportedExtensions() - expectedExts := map[string]bool{ - ".go": false, - ".py": false, - ".pyw": false, - } - - for _, ext := range exts { - if _, ok := expectedExts[ext]; ok { - expectedExts[ext] = true - } else { - t.Errorf("Unexpected extension: %s", ext) - } - } - - for ext, found := range expectedExts { - if !found { - t.Errorf("Expected extension %s not found", ext) - } - } -} diff --git a/internal/chunking/python/chunker.go b/internal/chunking/python/chunker.go deleted file mode 100644 index c4906f9..0000000 --- a/internal/chunking/python/chunker.go +++ /dev/null @@ -1,291 +0,0 @@ -// Package python provides AST-aware chunking for Python source files using tree-sitter. -package python - -import ( - "context" - "fmt" - "os" - "strings" - - sitter "github.com/smacker/go-tree-sitter" - "github.com/smacker/go-tree-sitter/python" - - "github.com/lukaszraczylo/claude-mnemonic/internal/chunking" -) - -// Chunker implements AST-aware chunking for Python files. -type Chunker struct { - parser *sitter.Parser - options chunking.ChunkOptions -} - -// NewChunker creates a new Python chunker. -func NewChunker(options chunking.ChunkOptions) *Chunker { - parser := sitter.NewParser() - parser.SetLanguage(python.GetLanguage()) - - return &Chunker{ - options: options, - parser: parser, - } -} - -// Language returns the language this chunker supports. -func (c *Chunker) Language() chunking.Language { - return chunking.LanguagePython -} - -// SupportedExtensions returns the file extensions this chunker handles. -func (c *Chunker) SupportedExtensions() []string { - return []string{".py"} -} - -// Chunk parses a Python source file and returns semantic code chunks. -func (c *Chunker) Chunk(ctx context.Context, filePath string) ([]chunking.Chunk, error) { - // Read file content - content, err := os.ReadFile(filePath) - if err != nil { - return nil, fmt.Errorf("read file: %w", err) - } - - // Parse the Python file - tree, err := c.parser.ParseCtx(ctx, nil, content) - if err != nil { - return nil, fmt.Errorf("parse Python file: %w", err) - } - defer tree.Close() - - sourceLines := strings.Split(string(content), "\n") - chunks := make([]chunking.Chunk, 0) - - // Walk the AST and extract chunks - c.walkNode(tree.RootNode(), content, sourceLines, filePath, "", &chunks) - - return chunks, nil -} - -// walkNode recursively walks the tree-sitter AST and extracts chunks. -func (c *Chunker) walkNode(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string, chunks *[]chunking.Chunk) { - nodeType := node.Type() - - switch nodeType { - case "function_definition": - chunk := c.extractFunction(node, source, sourceLines, filePath, parentName) - if chunk != nil { - *chunks = append(*chunks, *chunk) - } - - case "class_definition": - chunk := c.extractClass(node, source, sourceLines, filePath) - if chunk != nil { - *chunks = append(*chunks, *chunk) - - // Walk class body to find methods - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "block" { - c.walkNode(child, source, sourceLines, filePath, chunk.Name, chunks) - } - } - } - return // Don't walk children again - - case "block": - // Walk statements in block - for i := 0; i < int(node.ChildCount()); i++ { - c.walkNode(node.Child(i), source, sourceLines, filePath, parentName, chunks) - } - return - } - - // Walk all children - for i := 0; i < int(node.ChildCount()); i++ { - c.walkNode(node.Child(i), source, sourceLines, filePath, parentName, chunks) - } -} - -// extractFunction extracts a function definition chunk. -func (c *Chunker) extractFunction(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk { - // Find function name - var nameNode *sitter.Node - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "identifier" { - nameNode = child - break - } - } - - if nameNode == nil { - return nil - } - - name := nameNode.Content(source) - - // Skip private functions if configured - if !c.options.IncludePrivate && strings.HasPrefix(name, "_") && !strings.HasPrefix(name, "__") { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguagePython, - Name: name, - ParentName: parentName, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - } - - // Determine if this is a method or function - if parentName != "" { - chunk.Type = chunking.ChunkTypeMethod - } else { - chunk.Type = chunking.ChunkTypeFunction - } - - // Extract signature (def line) - chunk.Signature = c.extractFunctionSignature(node, source, sourceLines) - - // Extract docstring as doc comment - if c.options.IncludeDocComments { - chunk.DocComment = c.extractDocstring(node, source) - } - - return chunk -} - -// extractClass extracts a class definition chunk. -func (c *Chunker) extractClass(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk { - // Find class name - var nameNode *sitter.Node - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "identifier" { - nameNode = child - break - } - } - - if nameNode == nil { - return nil - } - - name := nameNode.Content(source) - - // Skip private classes if configured - if !c.options.IncludePrivate && strings.HasPrefix(name, "_") && !strings.HasPrefix(name, "__") { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguagePython, - Type: chunking.ChunkTypeClass, - Name: name, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - } - - // Extract class signature (class line) - chunk.Signature = c.extractClassSignature(node, source, sourceLines) - - // Extract docstring as doc comment - if c.options.IncludeDocComments { - chunk.DocComment = c.extractDocstring(node, source) - } - - return chunk -} - -// extractFunctionSignature extracts the function definition line. -func (c *Chunker) extractFunctionSignature(node *sitter.Node, source []byte, sourceLines []string) string { - startLine := int(node.StartPoint().Row) + 1 - - // Find the colon that ends the signature - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == ":" { - endLine := int(child.EndPoint().Row) + 1 - return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine)) - } - } - - // Fallback: just return first line - return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine)) -} - -// extractClassSignature extracts the class definition line. -func (c *Chunker) extractClassSignature(node *sitter.Node, source []byte, sourceLines []string) string { - startLine := int(node.StartPoint().Row) + 1 - - // Find the colon that ends the signature - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == ":" { - endLine := int(child.EndPoint().Row) + 1 - return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine)) - } - } - - // Fallback: just return first line - return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine)) -} - -// extractDocstring extracts the docstring from a function or class. -func (c *Chunker) extractDocstring(node *sitter.Node, source []byte) string { - // Find the block - var blockNode *sitter.Node - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "block" { - blockNode = child - break - } - } - - if blockNode == nil { - return "" - } - - // Check if first statement in block is a string (docstring) - for i := 0; i < int(blockNode.ChildCount()); i++ { - child := blockNode.Child(i) - if child.Type() == "expression_statement" { - // Check if it contains a string - for j := 0; j < int(child.ChildCount()); j++ { - grandchild := child.Child(j) - if grandchild.Type() == "string" { - docstring := grandchild.Content(source) - // Remove quotes - docstring = strings.Trim(docstring, `"'`) - return strings.TrimSpace(docstring) - } - } - } - } - - return "" -} - -// extractLines extracts a range of lines from source (1-indexed, inclusive). -func (c *Chunker) extractLines(lines []string, start, end int) string { - if start < 1 || end < start || start > len(lines) { - return "" - } - - startIdx := start - 1 - endIdx := end - if endIdx > len(lines) { - endIdx = len(lines) - } - - return strings.Join(lines[startIdx:endIdx], "\n") -} diff --git a/internal/chunking/python/chunker_test.go b/internal/chunking/python/chunker_test.go deleted file mode 100644 index b68e465..0000000 --- a/internal/chunking/python/chunker_test.go +++ /dev/null @@ -1,298 +0,0 @@ -package python - -import ( - "context" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/lukaszraczylo/claude-mnemonic/internal/chunking" -) - -// ============================================================================= -// TEST HELPERS -// ============================================================================= - -func createTempPythonFile(t *testing.T, content string) string { - t.Helper() - - tmpDir := t.TempDir() - filePath := filepath.Join(tmpDir, "test.py") - - err := os.WriteFile(filePath, []byte(content), 0600) - require.NoError(t, err) - - return filePath -} - -// ============================================================================= -// TESTS FOR Chunker -// ============================================================================= - -func TestNewChunker(t *testing.T) { - t.Parallel() - - opts := chunking.DefaultChunkOptions() - c := NewChunker(opts) - - assert.NotNil(t, c) - assert.NotNil(t, c.parser) -} - -func TestChunker_Language(t *testing.T) { - t.Parallel() - - c := NewChunker(chunking.DefaultChunkOptions()) - - assert.Equal(t, chunking.LanguagePython, c.Language()) -} - -func TestChunker_SupportedExtensions(t *testing.T) { - t.Parallel() - - c := NewChunker(chunking.DefaultChunkOptions()) - exts := c.SupportedExtensions() - - assert.Contains(t, exts, ".py") -} - -func TestChunker_Chunk_SimpleFunction(t *testing.T) { - t.Parallel() - - code := `def greet(name): - """Greets a person by name.""" - return f"Hello, {name}!" -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find the greet function - var foundGreet bool - for _, chunk := range chunks { - if chunk.Name == "greet" { - foundGreet = true - assert.Equal(t, chunking.ChunkTypeFunction, chunk.Type) - assert.Equal(t, chunking.LanguagePython, chunk.Language) - assert.Contains(t, chunk.Content, "def greet") - } - } - assert.True(t, foundGreet, "Should find 'greet' function") -} - -func TestChunker_Chunk_ClassWithMethods(t *testing.T) { - t.Parallel() - - code := `class Calculator: - """A simple calculator class.""" - - def add(self, a, b): - """Adds two numbers.""" - return a + b - - def multiply(self, a, b): - """Multiplies two numbers.""" - return a * b -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find the Calculator class and its methods - var foundClass, foundAdd, foundMultiply bool - for _, chunk := range chunks { - switch chunk.Name { - case "Calculator": - foundClass = true - assert.Equal(t, chunking.ChunkTypeClass, chunk.Type) - case "add": - foundAdd = true - assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type) - assert.Equal(t, "Calculator", chunk.ParentName) - case "multiply": - foundMultiply = true - assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type) - assert.Equal(t, "Calculator", chunk.ParentName) - } - } - - assert.True(t, foundClass, "Should find 'Calculator' class") - assert.True(t, foundAdd, "Should find 'add' method") - assert.True(t, foundMultiply, "Should find 'multiply' method") -} - -func TestChunker_Chunk_MultipleFunctions(t *testing.T) { - t.Parallel() - - code := `def first_function(): - pass - -def second_function(x, y): - return x + y - -def third_function(): - """Has a docstring.""" - return 42 -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - - // Should find all three functions - functionNames := make(map[string]bool) - for _, chunk := range chunks { - if chunk.Type == chunking.ChunkTypeFunction { - functionNames[chunk.Name] = true - } - } - - assert.True(t, functionNames["first_function"]) - assert.True(t, functionNames["second_function"]) - assert.True(t, functionNames["third_function"]) -} - -func TestChunker_Chunk_FileNotFound(t *testing.T) { - t.Parallel() - - c := NewChunker(chunking.DefaultChunkOptions()) - - _, err := c.Chunk(context.Background(), "/nonexistent/path/file.py") - require.Error(t, err) - assert.Contains(t, err.Error(), "read file") -} - -func TestChunker_Chunk_EmptyFile(t *testing.T) { - t.Parallel() - - filePath := createTempPythonFile(t, "") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - assert.Empty(t, chunks) -} - -func TestChunker_Chunk_OnlyComments(t *testing.T) { - t.Parallel() - - code := `# This is a comment -# Another comment -""" -This is a module docstring -""" -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - // Comments and docstrings without code should not produce chunks - assert.Empty(t, chunks) -} - -func TestChunker_Chunk_NestedClass(t *testing.T) { - t.Parallel() - - code := `class Outer: - class Inner: - def inner_method(self): - pass - - def outer_method(self): - pass -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find the Outer class at minimum - var foundOuter bool - for _, chunk := range chunks { - if chunk.Name == "Outer" { - foundOuter = true - } - } - assert.True(t, foundOuter, "Should find 'Outer' class") -} - -func TestChunker_Chunk_Decorators(t *testing.T) { - t.Parallel() - - code := `@staticmethod -def static_func(): - pass - -@classmethod -def class_func(cls): - pass - -@property -def my_property(self): - return self._value -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find decorated functions - functionNames := make(map[string]bool) - for _, chunk := range chunks { - functionNames[chunk.Name] = true - } - - assert.True(t, functionNames["static_func"]) - assert.True(t, functionNames["class_func"]) - assert.True(t, functionNames["my_property"]) -} - -func TestChunker_Chunk_AsyncFunction(t *testing.T) { - t.Parallel() - - code := `async def fetch_data(url): - """Fetches data from URL asynchronously.""" - pass - -async def process_items(items): - for item in items: - await process(item) -` - - filePath := createTempPythonFile(t, code) - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find async functions - functionNames := make(map[string]bool) - for _, chunk := range chunks { - functionNames[chunk.Name] = true - } - - assert.True(t, functionNames["fetch_data"]) - assert.True(t, functionNames["process_items"]) -} diff --git a/internal/chunking/types.go b/internal/chunking/types.go deleted file mode 100644 index ce639b1..0000000 --- a/internal/chunking/types.go +++ /dev/null @@ -1,140 +0,0 @@ -// Package chunking provides AST-aware code chunking for semantic code search. -// Chunks code files into logical units (functions, classes, methods) that preserve -// semantic boundaries for better vector embedding and retrieval. -package chunking - -import ( - "context" - "fmt" - "strings" -) - -// ChunkType represents the type of code chunk. -type ChunkType string - -const ( - // ChunkTypeFunction represents a standalone function. - ChunkTypeFunction ChunkType = "function" - // ChunkTypeMethod represents a method on a class/struct/type. - ChunkTypeMethod ChunkType = "method" - // ChunkTypeClass represents a class or struct definition. - ChunkTypeClass ChunkType = "class" - // ChunkTypeInterface represents an interface definition. - ChunkTypeInterface ChunkType = "interface" - // ChunkTypeType represents a type alias or type definition. - ChunkTypeType ChunkType = "type" - // ChunkTypeConst represents constant declarations. - ChunkTypeConst ChunkType = "const" - // ChunkTypeVar represents variable declarations. - ChunkTypeVar ChunkType = "var" -) - -// Language represents a programming language. -type Language string - -const ( - // LanguageGo represents the Go programming language. - LanguageGo Language = "go" - // LanguagePython represents the Python programming language. - LanguagePython Language = "python" - // LanguageTypeScript represents the TypeScript programming language. - LanguageTypeScript Language = "typescript" - // LanguageJavaScript represents the JavaScript programming language. - LanguageJavaScript Language = "javascript" -) - -// Chunk represents a semantic code chunk with AST-derived boundaries. -type Chunk struct { - Metadata map[string]interface{} - FilePath string - Language Language - Type ChunkType - Name string - ParentName string - Content string - Signature string - DocComment string - StartLine int - EndLine int -} - -// Identifier returns a human-readable identifier for this chunk. -// Format: "ParentName.Name" for methods, "Name" for top-level. -func (c *Chunk) Identifier() string { - if c.ParentName != "" { - return fmt.Sprintf("%s.%s", c.ParentName, c.Name) - } - return c.Name -} - -// LineRange returns a human-readable line range. -// Format: "L123-L456" -func (c *Chunk) LineRange() string { - return fmt.Sprintf("L%d-L%d", c.StartLine, c.EndLine) -} - -// SearchableContent returns content optimized for semantic search. -// Combines signature, doc comment, and content in a structured format. -func (c *Chunk) SearchableContent() string { - var parts []string - - // Include signature for functions/methods - if c.Signature != "" { - parts = append(parts, c.Signature) - } - - // Include doc comment - if c.DocComment != "" { - parts = append(parts, c.DocComment) - } - - // Include actual content - if c.Content != "" { - parts = append(parts, c.Content) - } - - return strings.Join(parts, "\n\n") -} - -// Chunker is the interface for language-specific code chunkers. -type Chunker interface { - // Chunk parses a source file and returns semantic code chunks. - // Returns an error if the file cannot be parsed or read. - Chunk(ctx context.Context, filePath string) ([]Chunk, error) - - // Language returns the language this chunker supports. - Language() Language - - // SupportedExtensions returns file extensions this chunker handles. - // Example: []string{".go"} for Go chunker - SupportedExtensions() []string -} - -// ChunkOptions provides options for chunking behavior. -type ChunkOptions struct { - // MaxChunkSize is the maximum size of a chunk in bytes. - // Chunks larger than this will be split (respecting boundaries where possible). - // 0 means no limit. - MaxChunkSize int - - // IncludeDocComments controls whether to include documentation comments. - IncludeDocComments bool - - // IncludePrivate controls whether to include private/unexported symbols. - IncludePrivate bool - - // MinLines is the minimum number of lines for a chunk to be included. - // Chunks smaller than this will be skipped. - // 0 means no minimum. - MinLines int -} - -// DefaultChunkOptions returns sensible default options. -func DefaultChunkOptions() ChunkOptions { - return ChunkOptions{ - MaxChunkSize: 8192, // ~8KB per chunk (well under token limit) - IncludeDocComments: true, - IncludePrivate: true, // Include all symbols for comprehensive search - MinLines: 0, // No minimum - include even single-line functions - } -} diff --git a/internal/chunking/types_test.go b/internal/chunking/types_test.go deleted file mode 100644 index 81cff09..0000000 --- a/internal/chunking/types_test.go +++ /dev/null @@ -1,213 +0,0 @@ -package chunking - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -// ============================================================================= -// TESTS FOR Chunk METHODS -// ============================================================================= - -func TestChunk_Identifier(t *testing.T) { - tests := []struct { - name string - expected string - chunk Chunk - }{ - // ===== GOOD CASES ===== - { - name: "top-level function", - chunk: Chunk{ - Name: "MyFunction", - ParentName: "", - }, - expected: "MyFunction", - }, - { - name: "method with parent", - chunk: Chunk{ - Name: "Process", - ParentName: "Handler", - }, - expected: "Handler.Process", - }, - { - name: "nested method", - chunk: Chunk{ - Name: "Validate", - ParentName: "UserService", - }, - expected: "UserService.Validate", - }, - - // ===== EDGE CASES ===== - { - name: "empty name", - chunk: Chunk{ - Name: "", - ParentName: "", - }, - expected: "", - }, - { - name: "parent but no name", - chunk: Chunk{ - Name: "", - ParentName: "Parent", - }, - expected: "Parent.", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := tt.chunk.Identifier() - assert.Equal(t, tt.expected, result) - }) - } -} - -func TestChunk_LineRange(t *testing.T) { - tests := []struct { - name string - expected string - chunk Chunk - }{ - // ===== GOOD CASES ===== - { - name: "single line", - chunk: Chunk{ - StartLine: 10, - EndLine: 10, - }, - expected: "L10-L10", - }, - { - name: "multi-line", - chunk: Chunk{ - StartLine: 25, - EndLine: 50, - }, - expected: "L25-L50", - }, - - // ===== EDGE CASES ===== - { - name: "line 1", - chunk: Chunk{ - StartLine: 1, - EndLine: 5, - }, - expected: "L1-L5", - }, - { - name: "large line numbers", - chunk: Chunk{ - StartLine: 1000, - EndLine: 2500, - }, - expected: "L1000-L2500", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := tt.chunk.LineRange() - assert.Equal(t, tt.expected, result) - }) - } -} - -func TestChunk_SearchableContent(t *testing.T) { - tests := []struct { - name string - contains []string - chunk Chunk - }{ - // ===== GOOD CASES ===== - { - name: "full chunk with all fields", - chunk: Chunk{ - Signature: "func ProcessData(input []byte) error", - DocComment: "// ProcessData handles incoming data", - Content: "func ProcessData(input []byte) error {\n\treturn nil\n}", - }, - contains: []string{ - "func ProcessData(input []byte) error", - "ProcessData handles incoming data", - "return nil", - }, - }, - { - name: "only signature", - chunk: Chunk{ - Signature: "func Hello()", - }, - contains: []string{"func Hello()"}, - }, - { - name: "only content", - chunk: Chunk{ - Content: "some code here", - }, - contains: []string{"some code here"}, - }, - - // ===== EDGE CASES ===== - { - name: "empty chunk", - chunk: Chunk{}, - contains: []string{}, - }, - { - name: "only doc comment", - chunk: Chunk{ - DocComment: "// Important documentation", - }, - contains: []string{"Important documentation"}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := tt.chunk.SearchableContent() - for _, expected := range tt.contains { - assert.Contains(t, result, expected) - } - }) - } -} - -func TestDefaultChunkOptions(t *testing.T) { - opts := DefaultChunkOptions() - - assert.Greater(t, opts.MaxChunkSize, 0, "MaxChunkSize should be positive") - assert.True(t, opts.IncludeDocComments, "IncludeDocComments should be true by default") - assert.True(t, opts.IncludePrivate, "IncludePrivate should be true by default") - assert.Equal(t, 0, opts.MinLines, "MinLines should be 0 by default") -} - -// ============================================================================= -// TESTS FOR ChunkType AND Language CONSTANTS -// ============================================================================= - -func TestChunkType_Values(t *testing.T) { - // Ensure all chunk types have expected values - assert.Equal(t, ChunkType("function"), ChunkTypeFunction) - assert.Equal(t, ChunkType("method"), ChunkTypeMethod) - assert.Equal(t, ChunkType("class"), ChunkTypeClass) - assert.Equal(t, ChunkType("interface"), ChunkTypeInterface) - assert.Equal(t, ChunkType("type"), ChunkTypeType) - assert.Equal(t, ChunkType("const"), ChunkTypeConst) - assert.Equal(t, ChunkType("var"), ChunkTypeVar) -} - -func TestLanguage_Values(t *testing.T) { - // Ensure all language types have expected values - assert.Equal(t, Language("go"), LanguageGo) - assert.Equal(t, Language("python"), LanguagePython) - assert.Equal(t, Language("typescript"), LanguageTypeScript) - assert.Equal(t, Language("javascript"), LanguageJavaScript) -} diff --git a/internal/chunking/typescript/chunker.go b/internal/chunking/typescript/chunker.go deleted file mode 100644 index 44029cd..0000000 --- a/internal/chunking/typescript/chunker.go +++ /dev/null @@ -1,403 +0,0 @@ -// Package typescript provides AST-aware chunking for TypeScript and JavaScript source files using tree-sitter. -package typescript - -import ( - "context" - "fmt" - "os" - "strings" - - sitter "github.com/smacker/go-tree-sitter" - "github.com/smacker/go-tree-sitter/typescript/typescript" - - "github.com/lukaszraczylo/claude-mnemonic/internal/chunking" -) - -// Chunker implements AST-aware chunking for TypeScript/JavaScript files. -type Chunker struct { - parser *sitter.Parser - options chunking.ChunkOptions -} - -// NewChunker creates a new TypeScript chunker. -func NewChunker(options chunking.ChunkOptions) *Chunker { - parser := sitter.NewParser() - parser.SetLanguage(typescript.GetLanguage()) - - return &Chunker{ - options: options, - parser: parser, - } -} - -// Language returns the language this chunker supports. -func (c *Chunker) Language() chunking.Language { - return chunking.LanguageTypeScript -} - -// SupportedExtensions returns the file extensions this chunker handles. -func (c *Chunker) SupportedExtensions() []string { - return []string{".ts", ".tsx", ".js", ".jsx"} -} - -// Chunk parses a TypeScript/JavaScript source file and returns semantic code chunks. -func (c *Chunker) Chunk(ctx context.Context, filePath string) ([]chunking.Chunk, error) { - // Read file content - content, err := os.ReadFile(filePath) - if err != nil { - return nil, fmt.Errorf("read file: %w", err) - } - - // Parse the file - tree, err := c.parser.ParseCtx(ctx, nil, content) - if err != nil { - return nil, fmt.Errorf("parse TypeScript file: %w", err) - } - defer tree.Close() - - sourceLines := strings.Split(string(content), "\n") - chunks := make([]chunking.Chunk, 0) - - // Walk the AST and extract chunks - c.walkNode(tree.RootNode(), content, sourceLines, filePath, "", &chunks) - - return chunks, nil -} - -// walkNode recursively walks the tree-sitter AST and extracts chunks. -func (c *Chunker) walkNode(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string, chunks *[]chunking.Chunk) { - nodeType := node.Type() - - switch nodeType { - case "function_declaration": - chunk := c.extractFunction(node, source, sourceLines, filePath, parentName) - if chunk != nil { - *chunks = append(*chunks, *chunk) - } - - case "method_definition": - chunk := c.extractMethod(node, source, sourceLines, filePath, parentName) - if chunk != nil { - *chunks = append(*chunks, *chunk) - } - - case "arrow_function", "function_expression": - // Handle arrow functions and function expressions assigned to variables - chunk := c.extractFunctionExpression(node, source, sourceLines, filePath, parentName) - if chunk != nil { - *chunks = append(*chunks, *chunk) - } - - case "class_declaration": - chunk := c.extractClass(node, source, sourceLines, filePath) - if chunk != nil { - *chunks = append(*chunks, *chunk) - - // Walk class body to find methods - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "class_body" { - c.walkNode(child, source, sourceLines, filePath, chunk.Name, chunks) - } - } - } - return // Don't walk children again - - case "interface_declaration": - chunk := c.extractInterface(node, source, sourceLines, filePath) - if chunk != nil { - *chunks = append(*chunks, *chunk) - } - - case "type_alias_declaration": - chunk := c.extractTypeAlias(node, source, sourceLines, filePath) - if chunk != nil { - *chunks = append(*chunks, *chunk) - } - } - - // Walk all children - for i := 0; i < int(node.ChildCount()); i++ { - c.walkNode(node.Child(i), source, sourceLines, filePath, parentName, chunks) - } -} - -// extractFunction extracts a function declaration. -func (c *Chunker) extractFunction(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk { - name := c.findChildContent(node, "identifier", source) - if name == "" { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageTypeScript, - Type: chunking.ChunkTypeFunction, - Name: name, - ParentName: parentName, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - Signature: c.extractFunctionSignature(node, source, sourceLines), - } - - // Extract JSDoc comment - if c.options.IncludeDocComments { - chunk.DocComment = c.extractComment(node, source) - } - - return chunk -} - -// extractMethod extracts a method definition from a class. -func (c *Chunker) extractMethod(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk { - name := c.findChildContent(node, "property_identifier", source) - if name == "" { - return nil - } - - // Skip private methods if configured - if !c.options.IncludePrivate && strings.HasPrefix(name, "_") { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageTypeScript, - Type: chunking.ChunkTypeMethod, - Name: name, - ParentName: parentName, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - Signature: c.extractMethodSignature(node, source, sourceLines), - } - - // Extract JSDoc comment - if c.options.IncludeDocComments { - chunk.DocComment = c.extractComment(node, source) - } - - return chunk -} - -// extractFunctionExpression extracts arrow functions and function expressions. -func (c *Chunker) extractFunctionExpression(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk { - // Try to find the variable name from parent - parent := node.Parent() - if parent == nil { - return nil - } - - var name string - if parent.Type() == "variable_declarator" { - name = c.findChildContent(parent, "identifier", source) - } else if parent.Type() == "assignment_expression" { - // Handle const foo = () => {} - for i := 0; i < int(parent.ChildCount()); i++ { - child := parent.Child(i) - if child.Type() == "identifier" || child.Type() == "member_expression" { - name = child.Content(source) - break - } - } - } - - if name == "" { - return nil // Anonymous function, skip - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageTypeScript, - Type: chunking.ChunkTypeFunction, - Name: name, - ParentName: parentName, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - } - - return chunk -} - -// extractClass extracts a class declaration. -func (c *Chunker) extractClass(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk { - name := c.findChildContent(node, "type_identifier", source) - if name == "" { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageTypeScript, - Type: chunking.ChunkTypeClass, - Name: name, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - Signature: c.extractClassSignature(node, source, sourceLines), - } - - // Extract JSDoc comment - if c.options.IncludeDocComments { - chunk.DocComment = c.extractComment(node, source) - } - - return chunk -} - -// extractInterface extracts an interface declaration. -func (c *Chunker) extractInterface(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk { - name := c.findChildContent(node, "type_identifier", source) - if name == "" { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageTypeScript, - Type: chunking.ChunkTypeInterface, - Name: name, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - } - - // Extract JSDoc comment - if c.options.IncludeDocComments { - chunk.DocComment = c.extractComment(node, source) - } - - return chunk -} - -// extractTypeAlias extracts a type alias declaration. -func (c *Chunker) extractTypeAlias(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk { - name := c.findChildContent(node, "type_identifier", source) - if name == "" { - return nil - } - - startLine := int(node.StartPoint().Row) + 1 - endLine := int(node.EndPoint().Row) + 1 - - chunk := &chunking.Chunk{ - FilePath: filePath, - Language: chunking.LanguageTypeScript, - Type: chunking.ChunkTypeType, - Name: name, - StartLine: startLine, - EndLine: endLine, - Content: c.extractLines(sourceLines, startLine, endLine), - } - - return chunk -} - -// findChildContent finds the first child of the given type and returns its content. -func (c *Chunker) findChildContent(node *sitter.Node, childType string, source []byte) string { - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == childType { - return child.Content(source) - } - } - return "" -} - -// extractFunctionSignature extracts the function signature. -func (c *Chunker) extractFunctionSignature(node *sitter.Node, source []byte, sourceLines []string) string { - startLine := int(node.StartPoint().Row) + 1 - - // Find the opening brace of the body - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "statement_block" { - endLine := int(child.StartPoint().Row) + 1 - return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine-1)) - } - } - - // Fallback: just return first line - return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine)) -} - -// extractMethodSignature extracts the method signature. -func (c *Chunker) extractMethodSignature(node *sitter.Node, source []byte, sourceLines []string) string { - startLine := int(node.StartPoint().Row) + 1 - - // Find the opening brace of the body - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "statement_block" { - endLine := int(child.StartPoint().Row) + 1 - return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine-1)) - } - } - - return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine)) -} - -// extractClassSignature extracts the class declaration line. -func (c *Chunker) extractClassSignature(node *sitter.Node, source []byte, sourceLines []string) string { - startLine := int(node.StartPoint().Row) + 1 - - // Find the opening brace of the class body - for i := 0; i < int(node.ChildCount()); i++ { - child := node.Child(i) - if child.Type() == "class_body" { - endLine := int(child.StartPoint().Row) + 1 - return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine-1)) - } - } - - return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine)) -} - -// extractComment extracts JSDoc or other comments from a node. -func (c *Chunker) extractComment(node *sitter.Node, source []byte) string { - // Check previous sibling for comment - prevSibling := node.PrevSibling() - if prevSibling != nil && prevSibling.Type() == "comment" { - comment := prevSibling.Content(source) - // Remove comment markers - comment = strings.TrimPrefix(comment, "/**") - comment = strings.TrimPrefix(comment, "/*") - comment = strings.TrimSuffix(comment, "*/") - comment = strings.TrimPrefix(comment, "//") - return strings.TrimSpace(comment) - } - - return "" -} - -// extractLines extracts a range of lines from source (1-indexed, inclusive). -func (c *Chunker) extractLines(lines []string, start, end int) string { - if start < 1 || end < start || start > len(lines) { - return "" - } - - startIdx := start - 1 - endIdx := end - if endIdx > len(lines) { - endIdx = len(lines) - } - - return strings.Join(lines[startIdx:endIdx], "\n") -} diff --git a/internal/chunking/typescript/chunker_test.go b/internal/chunking/typescript/chunker_test.go deleted file mode 100644 index 108f76a..0000000 --- a/internal/chunking/typescript/chunker_test.go +++ /dev/null @@ -1,398 +0,0 @@ -package typescript - -import ( - "context" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/lukaszraczylo/claude-mnemonic/internal/chunking" -) - -// ============================================================================= -// TEST HELPERS -// ============================================================================= - -func createTempTSFile(t *testing.T, content string, ext string) string { - t.Helper() - - tmpDir := t.TempDir() - filePath := filepath.Join(tmpDir, "test"+ext) - - err := os.WriteFile(filePath, []byte(content), 0600) - require.NoError(t, err) - - return filePath -} - -// ============================================================================= -// TESTS FOR Chunker -// ============================================================================= - -func TestNewChunker(t *testing.T) { - t.Parallel() - - opts := chunking.DefaultChunkOptions() - c := NewChunker(opts) - - assert.NotNil(t, c) - assert.NotNil(t, c.parser) -} - -func TestChunker_Language(t *testing.T) { - t.Parallel() - - c := NewChunker(chunking.DefaultChunkOptions()) - - assert.Equal(t, chunking.LanguageTypeScript, c.Language()) -} - -func TestChunker_SupportedExtensions(t *testing.T) { - t.Parallel() - - c := NewChunker(chunking.DefaultChunkOptions()) - exts := c.SupportedExtensions() - - assert.Contains(t, exts, ".ts") - assert.Contains(t, exts, ".tsx") - assert.Contains(t, exts, ".js") - assert.Contains(t, exts, ".jsx") -} - -func TestChunker_Chunk_SimpleFunction(t *testing.T) { - t.Parallel() - - code := `function greet(name: string): string { - return "Hello, " + name + "!"; -} -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find the greet function - var foundGreet bool - for _, chunk := range chunks { - if chunk.Name == "greet" { - foundGreet = true - assert.Equal(t, chunking.ChunkTypeFunction, chunk.Type) - assert.Equal(t, chunking.LanguageTypeScript, chunk.Language) - assert.Contains(t, chunk.Content, "function greet") - } - } - assert.True(t, foundGreet, "Should find 'greet' function") -} - -func TestChunker_Chunk_ClassWithMethods(t *testing.T) { - t.Parallel() - - code := `class Calculator { - add(a: number, b: number): number { - return a + b; - } - - multiply(a: number, b: number): number { - return a * b; - } -} -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find the Calculator class and its methods - var foundClass, foundAdd, foundMultiply bool - for _, chunk := range chunks { - switch chunk.Name { - case "Calculator": - foundClass = true - assert.Equal(t, chunking.ChunkTypeClass, chunk.Type) - case "add": - foundAdd = true - assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type) - assert.Equal(t, "Calculator", chunk.ParentName) - case "multiply": - foundMultiply = true - assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type) - assert.Equal(t, "Calculator", chunk.ParentName) - } - } - - assert.True(t, foundClass, "Should find 'Calculator' class") - assert.True(t, foundAdd, "Should find 'add' method") - assert.True(t, foundMultiply, "Should find 'multiply' method") -} - -func TestChunker_Chunk_Interface(t *testing.T) { - t.Parallel() - - code := `interface User { - id: number; - name: string; - email: string; -} - -interface Authenticator { - login(username: string, password: string): boolean; - logout(): void; -} -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find interfaces - interfaceNames := make(map[string]bool) - for _, chunk := range chunks { - if chunk.Type == chunking.ChunkTypeInterface { - interfaceNames[chunk.Name] = true - } - } - - assert.True(t, interfaceNames["User"]) - assert.True(t, interfaceNames["Authenticator"]) -} - -func TestChunker_Chunk_TypeAlias(t *testing.T) { - t.Parallel() - - code := `type UserID = string; - -type Handler = (event: Event) => void; - -type Result = { success: true; data: T } | { success: false; error: Error }; -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find type aliases - typeNames := make(map[string]bool) - for _, chunk := range chunks { - if chunk.Type == chunking.ChunkTypeType { - typeNames[chunk.Name] = true - } - } - - assert.True(t, typeNames["UserID"]) - assert.True(t, typeNames["Handler"]) - assert.True(t, typeNames["Result"]) -} - -func TestChunker_Chunk_ArrowFunction(t *testing.T) { - t.Parallel() - - code := `const add = (a: number, b: number): number => a + b; - -const greet = (name: string): string => { - return "Hello, " + name; -}; -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - _, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - // Arrow functions may or may not be captured depending on AST structure - // At minimum, no error should occur -} - -func TestChunker_Chunk_FileNotFound(t *testing.T) { - t.Parallel() - - c := NewChunker(chunking.DefaultChunkOptions()) - - _, err := c.Chunk(context.Background(), "/nonexistent/path/file.ts") - require.Error(t, err) - assert.Contains(t, err.Error(), "read file") -} - -func TestChunker_Chunk_EmptyFile(t *testing.T) { - t.Parallel() - - filePath := createTempTSFile(t, "", ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - assert.Empty(t, chunks) -} - -func TestChunker_Chunk_OnlyComments(t *testing.T) { - t.Parallel() - - code := `// This is a comment -/* Another comment */ -/** - * JSDoc comment - */ -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - // Comments without code should not produce chunks - assert.Empty(t, chunks) -} - -func TestChunker_Chunk_AsyncFunction(t *testing.T) { - t.Parallel() - - code := `async function fetchData(url: string): Promise { - const response = await fetch(url); - return response.json(); -} - -async function processItems(items: string[]): Promise { - for (const item of items) { - await process(item); - } -} -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find async functions - functionNames := make(map[string]bool) - for _, chunk := range chunks { - if chunk.Type == chunking.ChunkTypeFunction { - functionNames[chunk.Name] = true - } - } - - assert.True(t, functionNames["fetchData"]) - assert.True(t, functionNames["processItems"]) -} - -func TestChunker_Chunk_ExportedFunction(t *testing.T) { - t.Parallel() - - code := `export function publicFunction(): void { - console.log("public"); -} - -export default function defaultExport(): void { - console.log("default"); -} -` - - filePath := createTempTSFile(t, code, ".ts") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find exported functions - functionNames := make(map[string]bool) - for _, chunk := range chunks { - if chunk.Type == chunking.ChunkTypeFunction { - functionNames[chunk.Name] = true - } - } - - assert.True(t, functionNames["publicFunction"]) - assert.True(t, functionNames["defaultExport"]) -} - -func TestChunker_Chunk_JSXFile(t *testing.T) { - t.Parallel() - - code := `function Button({ label }: { label: string }) { - return ; -} - -function App() { - return ( -
-
- ); -} -` - - filePath := createTempTSFile(t, code, ".tsx") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find JSX components as functions - functionNames := make(map[string]bool) - for _, chunk := range chunks { - if chunk.Type == chunking.ChunkTypeFunction { - functionNames[chunk.Name] = true - } - } - - assert.True(t, functionNames["Button"]) - assert.True(t, functionNames["App"]) -} - -func TestChunker_Chunk_JavaScript(t *testing.T) { - t.Parallel() - - code := `function simpleFunc() { - return 42; -} - -class MyClass { - constructor() { - this.value = 0; - } - - getValue() { - return this.value; - } -} -` - - filePath := createTempTSFile(t, code, ".js") - c := NewChunker(chunking.DefaultChunkOptions()) - - chunks, err := c.Chunk(context.Background(), filePath) - require.NoError(t, err) - require.NotEmpty(t, chunks) - - // Should find JavaScript functions and classes - var foundFunc, foundClass bool - for _, chunk := range chunks { - if chunk.Name == "simpleFunc" { - foundFunc = true - } - if chunk.Name == "MyClass" { - foundClass = true - } - } - - assert.True(t, foundFunc, "Should find 'simpleFunc' function") - assert.True(t, foundClass, "Should find 'MyClass' class") -}