refactor: remove dead internal/chunking package

The chunking Manager had zero production callers (only its own tests). Removed the package and ran go mod tidy, dropping the now-orphaned go-tree-sitter dependency.
This commit is contained in:
2026-06-19 14:01:42 +01:00
parent 11ee9a4f1f
commit af19b6a798
12 changed files with 0 additions and 2513 deletions
-1
View File
@@ -13,7 +13,6 @@ require (
github.com/lukaszraczylo/oss-telemetry v0.2.3
github.com/mattn/go-sqlite3 v1.14.46
github.com/rs/zerolog v1.35.1
github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82
github.com/stretchr/testify v1.11.1
github.com/sugarme/tokenizer v0.3.0
github.com/yalue/onnxruntime_go v1.31.0
-2
View File
@@ -39,8 +39,6 @@ github.com/rs/zerolog v1.35.1 h1:m7xQeoiLIiV0BCEY4Hs+j2NG4Gp2o2KPKmhnnLiazKI=
github.com/rs/zerolog v1.35.1/go.mod h1:EjML9kdfa/RMA7h/6z6pYmq1ykOuA8/mjWaEvGI+jcw=
github.com/schollz/progressbar/v2 v2.15.0 h1:dVzHQ8fHRmtPjD3K10jT3Qgn/+H+92jhPrhmxIJfDz8=
github.com/schollz/progressbar/v2 v2.15.0/go.mod h1:UdPq3prGkfQ7MOzZKlDRpYKcFqEMczbD7YmbPgpzKMI=
github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4=
github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-285
View File
@@ -1,285 +0,0 @@
// Package golang provides AST-aware chunking for Go source files.
package golang
import (
"context"
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"strings"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
)
// Chunker implements AST-aware chunking for Go files.
type Chunker struct {
options chunking.ChunkOptions
}
// NewChunker creates a new Go chunker.
func NewChunker(options chunking.ChunkOptions) *Chunker {
return &Chunker{options: options}
}
// Language returns the language this chunker supports.
func (c *Chunker) Language() chunking.Language {
return chunking.LanguageGo
}
// SupportedExtensions returns the file extensions this chunker handles.
func (c *Chunker) SupportedExtensions() []string {
return []string{".go"}
}
// Chunk parses a Go source file and returns semantic code chunks.
func (c *Chunker) Chunk(ctx context.Context, filePath string) ([]chunking.Chunk, error) {
// Read file content
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("read file: %w", err)
}
// Parse the Go file
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, filePath, content, parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("parse Go file: %w", err)
}
chunks := make([]chunking.Chunk, 0)
sourceLines := strings.Split(string(content), "\n")
// Extract chunks from declarations
for _, decl := range file.Decls {
switch d := decl.(type) {
case *ast.FuncDecl:
chunk := c.extractFunction(fset, d, sourceLines, filePath)
if chunk != nil {
chunks = append(chunks, *chunk)
}
case *ast.GenDecl:
extracted := c.extractGenDecl(fset, d, sourceLines, filePath)
chunks = append(chunks, extracted...)
}
}
return chunks, nil
}
// extractFunction extracts a function or method declaration as a chunk.
func (c *Chunker) extractFunction(fset *token.FileSet, fn *ast.FuncDecl, sourceLines []string, filePath string) *chunking.Chunk {
// Skip unexported if configured
if !c.options.IncludePrivate && !fn.Name.IsExported() {
return nil
}
startPos := fset.Position(fn.Pos())
endPos := fset.Position(fn.End())
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageGo,
Name: fn.Name.Name,
StartLine: startPos.Line,
EndLine: endPos.Line,
}
// Determine if this is a method or a function
if fn.Recv != nil && len(fn.Recv.List) > 0 {
chunk.Type = chunking.ChunkTypeMethod
chunk.ParentName = c.extractReceiverType(fn.Recv)
} else {
chunk.Type = chunking.ChunkTypeFunction
}
// Extract content
chunk.Content = c.extractLines(sourceLines, startPos.Line, endPos.Line)
// Extract signature (function declaration without body)
chunk.Signature = c.extractFunctionSignature(fn, fset, sourceLines)
// Extract doc comment
if c.options.IncludeDocComments && fn.Doc != nil {
chunk.DocComment = strings.TrimSpace(fn.Doc.Text())
}
return chunk
}
// extractGenDecl extracts general declarations (type, const, var).
func (c *Chunker) extractGenDecl(fset *token.FileSet, gd *ast.GenDecl, sourceLines []string, filePath string) []chunking.Chunk {
var chunks []chunking.Chunk
for _, spec := range gd.Specs {
switch s := spec.(type) {
case *ast.TypeSpec:
chunk := c.extractTypeSpec(fset, gd, s, sourceLines, filePath)
if chunk != nil {
chunks = append(chunks, *chunk)
}
case *ast.ValueSpec:
// Handle const and var declarations
chunk := c.extractValueSpec(fset, gd, s, sourceLines, filePath)
if chunk != nil {
chunks = append(chunks, *chunk)
}
}
}
return chunks
}
// extractTypeSpec extracts a type declaration (struct, interface, type alias).
func (c *Chunker) extractTypeSpec(fset *token.FileSet, gd *ast.GenDecl, ts *ast.TypeSpec, sourceLines []string, filePath string) *chunking.Chunk {
// Skip unexported if configured
if !c.options.IncludePrivate && !ts.Name.IsExported() {
return nil
}
startPos := fset.Position(gd.Pos())
endPos := fset.Position(gd.End())
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageGo,
Name: ts.Name.Name,
StartLine: startPos.Line,
EndLine: endPos.Line,
Content: c.extractLines(sourceLines, startPos.Line, endPos.Line),
}
// Determine chunk type based on type expression
switch ts.Type.(type) {
case *ast.StructType:
chunk.Type = chunking.ChunkTypeClass // Treat struct as class
case *ast.InterfaceType:
chunk.Type = chunking.ChunkTypeInterface
default:
chunk.Type = chunking.ChunkTypeType
}
// Extract doc comment
if c.options.IncludeDocComments && gd.Doc != nil {
chunk.DocComment = strings.TrimSpace(gd.Doc.Text())
}
return chunk
}
// extractValueSpec extracts const or var declarations.
func (c *Chunker) extractValueSpec(fset *token.FileSet, gd *ast.GenDecl, vs *ast.ValueSpec, sourceLines []string, filePath string) *chunking.Chunk {
// Skip if all names are unexported and we're excluding private
if !c.options.IncludePrivate {
allUnexported := true
for _, name := range vs.Names {
if name.IsExported() {
allUnexported = false
break
}
}
if allUnexported {
return nil
}
}
startPos := fset.Position(gd.Pos())
endPos := fset.Position(gd.End())
// Use first name as the chunk name, join multiple if present
names := make([]string, len(vs.Names))
for i, name := range vs.Names {
names[i] = name.Name
}
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageGo,
Name: strings.Join(names, ", "),
StartLine: startPos.Line,
EndLine: endPos.Line,
Content: c.extractLines(sourceLines, startPos.Line, endPos.Line),
}
// Set type based on token
if gd.Tok == token.CONST {
chunk.Type = chunking.ChunkTypeConst
} else {
chunk.Type = chunking.ChunkTypeVar
}
// Extract doc comment
if c.options.IncludeDocComments && gd.Doc != nil {
chunk.DocComment = strings.TrimSpace(gd.Doc.Text())
}
return chunk
}
// extractReceiverType extracts the receiver type name from a method.
func (c *Chunker) extractReceiverType(recv *ast.FieldList) string {
if len(recv.List) == 0 {
return ""
}
field := recv.List[0]
switch t := field.Type.(type) {
case *ast.Ident:
return t.Name
case *ast.StarExpr:
if ident, ok := t.X.(*ast.Ident); ok {
return ident.Name
}
}
return ""
}
// extractFunctionSignature extracts the function signature without the body.
func (c *Chunker) extractFunctionSignature(fn *ast.FuncDecl, fset *token.FileSet, sourceLines []string) string {
if fn.Body == nil {
// No body, return entire declaration
startPos := fset.Position(fn.Pos())
endPos := fset.Position(fn.End())
return c.extractLines(sourceLines, startPos.Line, endPos.Line)
}
// Extract from start of function to just before body
startPos := fset.Position(fn.Pos())
bodyPos := fset.Position(fn.Body.Pos())
// If body is on the same line, extract just that line up to the opening brace
if startPos.Line == bodyPos.Line {
line := sourceLines[startPos.Line-1]
// Find the opening brace position
if idx := strings.Index(line[startPos.Column-1:], "{"); idx >= 0 {
return strings.TrimSpace(line[startPos.Column-1 : startPos.Column-1+idx])
}
return strings.TrimSpace(line[startPos.Column-1:])
}
// Get lines from start to the line containing the opening brace
sig := c.extractLines(sourceLines, startPos.Line, bodyPos.Line)
// Remove the opening brace and anything after it
if idx := strings.Index(sig, "{"); idx >= 0 {
sig = sig[:idx]
}
return strings.TrimSpace(sig)
}
// extractLines extracts a range of lines from source (1-indexed, inclusive).
func (c *Chunker) extractLines(lines []string, start, end int) string {
if start < 1 || end < start || start > len(lines) {
return ""
}
// Adjust for 0-indexed array (start and end are 1-indexed)
startIdx := start - 1
endIdx := end
if endIdx > len(lines) {
endIdx = len(lines)
}
return strings.Join(lines[startIdx:endIdx], "\n")
}
-214
View File
@@ -1,214 +0,0 @@
package golang
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
)
func TestGoChunker_BasicFunctions(t *testing.T) {
// Create temp test file
tmpDir := t.TempDir()
testFile := filepath.Join(tmpDir, "test.go")
testCode := `package main
import "fmt"
// Greet prints a greeting message
func Greet(name string) {
fmt.Printf("Hello, %s!\n", name)
}
// Add adds two numbers
func Add(a, b int) int {
return a + b
}
// unexported function should be included by default
func helper() string {
return "helper"
}
`
if err := os.WriteFile(testFile, []byte(testCode), 0600); err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
// Create chunker with default options
chunker := NewChunker(chunking.DefaultChunkOptions())
// Chunk the file
chunks, err := chunker.Chunk(context.Background(), testFile)
if err != nil {
t.Fatalf("Chunk() failed: %v", err)
}
// Verify we got all functions
if len(chunks) != 3 {
t.Errorf("Expected 3 chunks (Greet, Add, helper), got %d", len(chunks))
}
// Verify chunk details
expectedNames := map[string]bool{
"Greet": false,
"Add": false,
"helper": false,
}
for _, chunk := range chunks {
if chunk.Type != chunking.ChunkTypeFunction {
t.Errorf("Expected chunk type 'function', got '%s'", chunk.Type)
}
if chunk.Language != chunking.LanguageGo {
t.Errorf("Expected language 'go', got '%s'", chunk.Language)
}
if _, ok := expectedNames[chunk.Name]; !ok {
t.Errorf("Unexpected function name: %s", chunk.Name)
} else {
expectedNames[chunk.Name] = true
}
// Verify content is non-empty
if chunk.Content == "" {
t.Errorf("Chunk %s has empty content", chunk.Name)
}
// Verify signature is present for functions
if chunk.Signature == "" {
t.Errorf("Chunk %s has empty signature", chunk.Name)
}
}
// Verify all expected functions were found
for name, found := range expectedNames {
if !found {
t.Errorf("Expected function %s not found", name)
}
}
}
func TestGoChunker_StructsAndMethods(t *testing.T) {
tmpDir := t.TempDir()
testFile := filepath.Join(tmpDir, "test.go")
testCode := `package main
// User represents a user
type User struct {
ID int
Name string
}
// GetName returns the user's name
func (u *User) GetName() string {
return u.Name
}
// SetName sets the user's name
func (u *User) SetName(name string) {
u.Name = name
}
`
if err := os.WriteFile(testFile, []byte(testCode), 0600); err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
chunker := NewChunker(chunking.DefaultChunkOptions())
chunks, err := chunker.Chunk(context.Background(), testFile)
if err != nil {
t.Fatalf("Chunk() failed: %v", err)
}
// Should have 1 struct + 2 methods = 3 chunks
if len(chunks) != 3 {
t.Errorf("Expected 3 chunks (User struct, GetName, SetName), got %d", len(chunks))
}
// Find the struct and methods
var structChunk, getNameChunk, setNameChunk *chunking.Chunk
for i := range chunks {
switch chunks[i].Name {
case "User":
structChunk = &chunks[i]
case "GetName":
getNameChunk = &chunks[i]
case "SetName":
setNameChunk = &chunks[i]
}
}
// Verify struct
if structChunk == nil {
t.Fatal("User struct not found")
}
if structChunk.Type != chunking.ChunkTypeClass {
t.Errorf("Expected User to be ChunkTypeClass, got %s", structChunk.Type)
}
// Verify methods
if getNameChunk == nil {
t.Fatal("GetName method not found")
}
if getNameChunk.Type != chunking.ChunkTypeMethod {
t.Errorf("Expected GetName to be ChunkTypeMethod, got %s", getNameChunk.Type)
}
if getNameChunk.ParentName != "User" {
t.Errorf("Expected GetName parent to be 'User', got '%s'", getNameChunk.ParentName)
}
if setNameChunk == nil {
t.Fatal("SetName method not found")
}
if setNameChunk.Type != chunking.ChunkTypeMethod {
t.Errorf("Expected SetName to be ChunkTypeMethod, got %s", setNameChunk.Type)
}
if setNameChunk.ParentName != "User" {
t.Errorf("Expected SetName parent to be 'User', got '%s'", setNameChunk.ParentName)
}
}
func TestGoChunker_DocComments(t *testing.T) {
tmpDir := t.TempDir()
testFile := filepath.Join(tmpDir, "test.go")
testCode := `package main
// Calculate performs a calculation.
// It takes two integers and returns their sum.
func Calculate(a, b int) int {
return a + b
}
`
if err := os.WriteFile(testFile, []byte(testCode), 0600); err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
chunker := NewChunker(chunking.DefaultChunkOptions())
chunks, err := chunker.Chunk(context.Background(), testFile)
if err != nil {
t.Fatalf("Chunk() failed: %v", err)
}
if len(chunks) != 1 {
t.Fatalf("Expected 1 chunk, got %d", len(chunks))
}
chunk := chunks[0]
if chunk.DocComment == "" {
t.Error("Expected doc comment to be present")
}
// Doc comment should contain the comment text
expectedComment := "Calculate performs a calculation.\nIt takes two integers and returns their sum."
if chunk.DocComment != expectedComment {
t.Errorf("Expected doc comment '%s', got '%s'", expectedComment, chunk.DocComment)
}
}
-106
View File
@@ -1,106 +0,0 @@
package chunking
import (
"context"
"fmt"
"path/filepath"
"strings"
)
// Manager dispatches files to appropriate language-specific chunkers.
type Manager struct {
chunkers map[string]Chunker // extension -> chunker
options ChunkOptions
}
// NewManager creates a new chunking manager with the given chunkers.
func NewManager(chunkers []Chunker, options ChunkOptions) *Manager {
m := &Manager{
chunkers: make(map[string]Chunker),
options: options,
}
// Register chunkers by their supported extensions
for _, chunker := range chunkers {
for _, ext := range chunker.SupportedExtensions() {
m.chunkers[ext] = chunker
}
}
return m
}
// ChunkFile chunks a single file using the appropriate language chunker.
// Returns an error if no chunker is found for the file extension.
func (m *Manager) ChunkFile(ctx context.Context, filePath string) ([]Chunk, error) {
ext := strings.ToLower(filepath.Ext(filePath))
chunker, ok := m.chunkers[ext]
if !ok {
return nil, fmt.Errorf("no chunker for extension %s", ext)
}
chunks, err := chunker.Chunk(ctx, filePath)
if err != nil {
return nil, fmt.Errorf("chunk %s: %w", filePath, err)
}
// Apply options-based filtering
filtered := make([]Chunk, 0, len(chunks))
for _, chunk := range chunks {
// Filter by minimum lines
if m.options.MinLines > 0 {
lineCount := chunk.EndLine - chunk.StartLine + 1
if lineCount < m.options.MinLines {
continue
}
}
// Filter by maximum chunk size
if m.options.MaxChunkSize > 0 && len(chunk.Content) > m.options.MaxChunkSize {
// TODO: Consider splitting large chunks intelligently
// For now, skip chunks that are too large
continue
}
filtered = append(filtered, chunk)
}
return filtered, nil
}
// ChunkFiles chunks multiple files in parallel.
// Returns a map of file path to chunks, and any errors encountered.
// Errors for individual files do not stop processing of other files.
func (m *Manager) ChunkFiles(ctx context.Context, filePaths []string) (map[string][]Chunk, []error) {
results := make(map[string][]Chunk)
var errors []error
for _, filePath := range filePaths {
chunks, err := m.ChunkFile(ctx, filePath)
if err != nil {
errors = append(errors, fmt.Errorf("%s: %w", filePath, err))
continue
}
if len(chunks) > 0 {
results[filePath] = chunks
}
}
return results, errors
}
// SupportsFile checks if the manager can chunk the given file based on extension.
func (m *Manager) SupportsFile(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
_, ok := m.chunkers[ext]
return ok
}
// SupportedExtensions returns all file extensions supported by registered chunkers.
func (m *Manager) SupportedExtensions() []string {
exts := make([]string, 0, len(m.chunkers))
for ext := range m.chunkers {
exts = append(exts, ext)
}
return exts
}
-162
View File
@@ -1,162 +0,0 @@
package chunking
import (
"context"
"os"
"path/filepath"
"testing"
)
// mockChunker is a test chunker that returns dummy chunks
type mockChunker struct{}
func (m *mockChunker) Chunk(ctx context.Context, filePath string) ([]Chunk, error) {
// Just return an empty chunk for testing
return []Chunk{
{
FilePath: filePath,
Language: LanguageGo,
Type: ChunkTypeFunction,
Name: "TestFunc",
StartLine: 1,
EndLine: 1,
Content: "test",
},
}, nil
}
func (m *mockChunker) Language() Language {
return LanguageGo
}
func (m *mockChunker) SupportedExtensions() []string {
return []string{".go", ".py", ".ts"}
}
func TestManager_ChunkMultipleFiles(t *testing.T) {
tmpDir := t.TempDir()
// Create a Go file
goFile := filepath.Join(tmpDir, "test.go")
goCode := `package main
func Hello() string {
return "hello"
}
`
if err := os.WriteFile(goFile, []byte(goCode), 0600); err != nil {
t.Fatalf("Failed to create Go file: %v", err)
}
// Create a Python file
pyFile := filepath.Join(tmpDir, "test.py")
pyCode := `def greet(name):
return f"Hello, {name}!"
class User:
def __init__(self, name):
self.name = name
`
if err := os.WriteFile(pyFile, []byte(pyCode), 0600); err != nil {
t.Fatalf("Failed to create Python file: %v", err)
}
// Create a TypeScript file
tsFile := filepath.Join(tmpDir, "test.ts")
tsCode := `function add(a: number, b: number): number {
return a + b;
}
class Calculator {
multiply(a: number, b: number): number {
return a * b;
}
}
`
if err := os.WriteFile(tsFile, []byte(tsCode), 0600); err != nil {
t.Fatalf("Failed to create TypeScript file: %v", err)
}
// Create manager
manager := NewManager([]Chunker{&mockChunker{}}, DefaultChunkOptions())
// Test SupportsFile
if !manager.SupportsFile(goFile) {
t.Error("Manager should support .go files")
}
if !manager.SupportsFile(pyFile) {
t.Error("Manager should support .py files")
}
if !manager.SupportsFile(tsFile) {
t.Error("Manager should support .ts files")
}
unsupportedFile := filepath.Join(tmpDir, "test.txt")
if manager.SupportsFile(unsupportedFile) {
t.Error("Manager should not support .txt files")
}
// Test ChunkFiles
results, errs := manager.ChunkFiles(context.Background(), []string{goFile, pyFile, tsFile})
if len(errs) > 0 {
t.Errorf("ChunkFiles returned errors: %v", errs)
}
if len(results) != 3 {
t.Errorf("Expected results for 3 files, got %d", len(results))
}
// Verify each file has chunks
for _, file := range []string{goFile, pyFile, tsFile} {
if chunks, ok := results[file]; !ok || len(chunks) == 0 {
t.Errorf("No chunks found for file %s", file)
}
}
}
// mockChunkerWithExts is a test chunker with configurable extensions
type mockChunkerWithExts struct {
exts []string
}
func (m *mockChunkerWithExts) Chunk(ctx context.Context, filePath string) ([]Chunk, error) {
return nil, nil
}
func (m *mockChunkerWithExts) Language() Language {
return LanguageGo
}
func (m *mockChunkerWithExts) SupportedExtensions() []string {
return m.exts
}
func TestManager_SupportedExtensions(t *testing.T) {
// Create manager with mock chunkers
manager := NewManager([]Chunker{
&mockChunkerWithExts{exts: []string{".go"}},
&mockChunkerWithExts{exts: []string{".py", ".pyw"}},
}, DefaultChunkOptions())
exts := manager.SupportedExtensions()
expectedExts := map[string]bool{
".go": false,
".py": false,
".pyw": false,
}
for _, ext := range exts {
if _, ok := expectedExts[ext]; ok {
expectedExts[ext] = true
} else {
t.Errorf("Unexpected extension: %s", ext)
}
}
for ext, found := range expectedExts {
if !found {
t.Errorf("Expected extension %s not found", ext)
}
}
}
-291
View File
@@ -1,291 +0,0 @@
// Package python provides AST-aware chunking for Python source files using tree-sitter.
package python
import (
"context"
"fmt"
"os"
"strings"
sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/python"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
)
// Chunker implements AST-aware chunking for Python files.
type Chunker struct {
parser *sitter.Parser
options chunking.ChunkOptions
}
// NewChunker creates a new Python chunker.
func NewChunker(options chunking.ChunkOptions) *Chunker {
parser := sitter.NewParser()
parser.SetLanguage(python.GetLanguage())
return &Chunker{
options: options,
parser: parser,
}
}
// Language returns the language this chunker supports.
func (c *Chunker) Language() chunking.Language {
return chunking.LanguagePython
}
// SupportedExtensions returns the file extensions this chunker handles.
func (c *Chunker) SupportedExtensions() []string {
return []string{".py"}
}
// Chunk parses a Python source file and returns semantic code chunks.
func (c *Chunker) Chunk(ctx context.Context, filePath string) ([]chunking.Chunk, error) {
// Read file content
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("read file: %w", err)
}
// Parse the Python file
tree, err := c.parser.ParseCtx(ctx, nil, content)
if err != nil {
return nil, fmt.Errorf("parse Python file: %w", err)
}
defer tree.Close()
sourceLines := strings.Split(string(content), "\n")
chunks := make([]chunking.Chunk, 0)
// Walk the AST and extract chunks
c.walkNode(tree.RootNode(), content, sourceLines, filePath, "", &chunks)
return chunks, nil
}
// walkNode recursively walks the tree-sitter AST and extracts chunks.
func (c *Chunker) walkNode(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string, chunks *[]chunking.Chunk) {
nodeType := node.Type()
switch nodeType {
case "function_definition":
chunk := c.extractFunction(node, source, sourceLines, filePath, parentName)
if chunk != nil {
*chunks = append(*chunks, *chunk)
}
case "class_definition":
chunk := c.extractClass(node, source, sourceLines, filePath)
if chunk != nil {
*chunks = append(*chunks, *chunk)
// Walk class body to find methods
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "block" {
c.walkNode(child, source, sourceLines, filePath, chunk.Name, chunks)
}
}
}
return // Don't walk children again
case "block":
// Walk statements in block
for i := 0; i < int(node.ChildCount()); i++ {
c.walkNode(node.Child(i), source, sourceLines, filePath, parentName, chunks)
}
return
}
// Walk all children
for i := 0; i < int(node.ChildCount()); i++ {
c.walkNode(node.Child(i), source, sourceLines, filePath, parentName, chunks)
}
}
// extractFunction extracts a function definition chunk.
func (c *Chunker) extractFunction(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk {
// Find function name
var nameNode *sitter.Node
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "identifier" {
nameNode = child
break
}
}
if nameNode == nil {
return nil
}
name := nameNode.Content(source)
// Skip private functions if configured
if !c.options.IncludePrivate && strings.HasPrefix(name, "_") && !strings.HasPrefix(name, "__") {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguagePython,
Name: name,
ParentName: parentName,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
}
// Determine if this is a method or function
if parentName != "" {
chunk.Type = chunking.ChunkTypeMethod
} else {
chunk.Type = chunking.ChunkTypeFunction
}
// Extract signature (def line)
chunk.Signature = c.extractFunctionSignature(node, source, sourceLines)
// Extract docstring as doc comment
if c.options.IncludeDocComments {
chunk.DocComment = c.extractDocstring(node, source)
}
return chunk
}
// extractClass extracts a class definition chunk.
func (c *Chunker) extractClass(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk {
// Find class name
var nameNode *sitter.Node
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "identifier" {
nameNode = child
break
}
}
if nameNode == nil {
return nil
}
name := nameNode.Content(source)
// Skip private classes if configured
if !c.options.IncludePrivate && strings.HasPrefix(name, "_") && !strings.HasPrefix(name, "__") {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguagePython,
Type: chunking.ChunkTypeClass,
Name: name,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
}
// Extract class signature (class line)
chunk.Signature = c.extractClassSignature(node, source, sourceLines)
// Extract docstring as doc comment
if c.options.IncludeDocComments {
chunk.DocComment = c.extractDocstring(node, source)
}
return chunk
}
// extractFunctionSignature extracts the function definition line.
func (c *Chunker) extractFunctionSignature(node *sitter.Node, source []byte, sourceLines []string) string {
startLine := int(node.StartPoint().Row) + 1
// Find the colon that ends the signature
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == ":" {
endLine := int(child.EndPoint().Row) + 1
return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine))
}
}
// Fallback: just return first line
return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine))
}
// extractClassSignature extracts the class definition line.
func (c *Chunker) extractClassSignature(node *sitter.Node, source []byte, sourceLines []string) string {
startLine := int(node.StartPoint().Row) + 1
// Find the colon that ends the signature
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == ":" {
endLine := int(child.EndPoint().Row) + 1
return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine))
}
}
// Fallback: just return first line
return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine))
}
// extractDocstring extracts the docstring from a function or class.
func (c *Chunker) extractDocstring(node *sitter.Node, source []byte) string {
// Find the block
var blockNode *sitter.Node
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "block" {
blockNode = child
break
}
}
if blockNode == nil {
return ""
}
// Check if first statement in block is a string (docstring)
for i := 0; i < int(blockNode.ChildCount()); i++ {
child := blockNode.Child(i)
if child.Type() == "expression_statement" {
// Check if it contains a string
for j := 0; j < int(child.ChildCount()); j++ {
grandchild := child.Child(j)
if grandchild.Type() == "string" {
docstring := grandchild.Content(source)
// Remove quotes
docstring = strings.Trim(docstring, `"'`)
return strings.TrimSpace(docstring)
}
}
}
}
return ""
}
// extractLines extracts a range of lines from source (1-indexed, inclusive).
func (c *Chunker) extractLines(lines []string, start, end int) string {
if start < 1 || end < start || start > len(lines) {
return ""
}
startIdx := start - 1
endIdx := end
if endIdx > len(lines) {
endIdx = len(lines)
}
return strings.Join(lines[startIdx:endIdx], "\n")
}
-298
View File
@@ -1,298 +0,0 @@
package python
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
)
// =============================================================================
// TEST HELPERS
// =============================================================================
func createTempPythonFile(t *testing.T, content string) string {
t.Helper()
tmpDir := t.TempDir()
filePath := filepath.Join(tmpDir, "test.py")
err := os.WriteFile(filePath, []byte(content), 0600)
require.NoError(t, err)
return filePath
}
// =============================================================================
// TESTS FOR Chunker
// =============================================================================
func TestNewChunker(t *testing.T) {
t.Parallel()
opts := chunking.DefaultChunkOptions()
c := NewChunker(opts)
assert.NotNil(t, c)
assert.NotNil(t, c.parser)
}
func TestChunker_Language(t *testing.T) {
t.Parallel()
c := NewChunker(chunking.DefaultChunkOptions())
assert.Equal(t, chunking.LanguagePython, c.Language())
}
func TestChunker_SupportedExtensions(t *testing.T) {
t.Parallel()
c := NewChunker(chunking.DefaultChunkOptions())
exts := c.SupportedExtensions()
assert.Contains(t, exts, ".py")
}
func TestChunker_Chunk_SimpleFunction(t *testing.T) {
t.Parallel()
code := `def greet(name):
"""Greets a person by name."""
return f"Hello, {name}!"
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find the greet function
var foundGreet bool
for _, chunk := range chunks {
if chunk.Name == "greet" {
foundGreet = true
assert.Equal(t, chunking.ChunkTypeFunction, chunk.Type)
assert.Equal(t, chunking.LanguagePython, chunk.Language)
assert.Contains(t, chunk.Content, "def greet")
}
}
assert.True(t, foundGreet, "Should find 'greet' function")
}
func TestChunker_Chunk_ClassWithMethods(t *testing.T) {
t.Parallel()
code := `class Calculator:
"""A simple calculator class."""
def add(self, a, b):
"""Adds two numbers."""
return a + b
def multiply(self, a, b):
"""Multiplies two numbers."""
return a * b
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find the Calculator class and its methods
var foundClass, foundAdd, foundMultiply bool
for _, chunk := range chunks {
switch chunk.Name {
case "Calculator":
foundClass = true
assert.Equal(t, chunking.ChunkTypeClass, chunk.Type)
case "add":
foundAdd = true
assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type)
assert.Equal(t, "Calculator", chunk.ParentName)
case "multiply":
foundMultiply = true
assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type)
assert.Equal(t, "Calculator", chunk.ParentName)
}
}
assert.True(t, foundClass, "Should find 'Calculator' class")
assert.True(t, foundAdd, "Should find 'add' method")
assert.True(t, foundMultiply, "Should find 'multiply' method")
}
func TestChunker_Chunk_MultipleFunctions(t *testing.T) {
t.Parallel()
code := `def first_function():
pass
def second_function(x, y):
return x + y
def third_function():
"""Has a docstring."""
return 42
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
// Should find all three functions
functionNames := make(map[string]bool)
for _, chunk := range chunks {
if chunk.Type == chunking.ChunkTypeFunction {
functionNames[chunk.Name] = true
}
}
assert.True(t, functionNames["first_function"])
assert.True(t, functionNames["second_function"])
assert.True(t, functionNames["third_function"])
}
func TestChunker_Chunk_FileNotFound(t *testing.T) {
t.Parallel()
c := NewChunker(chunking.DefaultChunkOptions())
_, err := c.Chunk(context.Background(), "/nonexistent/path/file.py")
require.Error(t, err)
assert.Contains(t, err.Error(), "read file")
}
func TestChunker_Chunk_EmptyFile(t *testing.T) {
t.Parallel()
filePath := createTempPythonFile(t, "")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
assert.Empty(t, chunks)
}
func TestChunker_Chunk_OnlyComments(t *testing.T) {
t.Parallel()
code := `# This is a comment
# Another comment
"""
This is a module docstring
"""
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
// Comments and docstrings without code should not produce chunks
assert.Empty(t, chunks)
}
func TestChunker_Chunk_NestedClass(t *testing.T) {
t.Parallel()
code := `class Outer:
class Inner:
def inner_method(self):
pass
def outer_method(self):
pass
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find the Outer class at minimum
var foundOuter bool
for _, chunk := range chunks {
if chunk.Name == "Outer" {
foundOuter = true
}
}
assert.True(t, foundOuter, "Should find 'Outer' class")
}
func TestChunker_Chunk_Decorators(t *testing.T) {
t.Parallel()
code := `@staticmethod
def static_func():
pass
@classmethod
def class_func(cls):
pass
@property
def my_property(self):
return self._value
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find decorated functions
functionNames := make(map[string]bool)
for _, chunk := range chunks {
functionNames[chunk.Name] = true
}
assert.True(t, functionNames["static_func"])
assert.True(t, functionNames["class_func"])
assert.True(t, functionNames["my_property"])
}
func TestChunker_Chunk_AsyncFunction(t *testing.T) {
t.Parallel()
code := `async def fetch_data(url):
"""Fetches data from URL asynchronously."""
pass
async def process_items(items):
for item in items:
await process(item)
`
filePath := createTempPythonFile(t, code)
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find async functions
functionNames := make(map[string]bool)
for _, chunk := range chunks {
functionNames[chunk.Name] = true
}
assert.True(t, functionNames["fetch_data"])
assert.True(t, functionNames["process_items"])
}
-140
View File
@@ -1,140 +0,0 @@
// Package chunking provides AST-aware code chunking for semantic code search.
// Chunks code files into logical units (functions, classes, methods) that preserve
// semantic boundaries for better vector embedding and retrieval.
package chunking
import (
"context"
"fmt"
"strings"
)
// ChunkType represents the type of code chunk.
type ChunkType string
const (
// ChunkTypeFunction represents a standalone function.
ChunkTypeFunction ChunkType = "function"
// ChunkTypeMethod represents a method on a class/struct/type.
ChunkTypeMethod ChunkType = "method"
// ChunkTypeClass represents a class or struct definition.
ChunkTypeClass ChunkType = "class"
// ChunkTypeInterface represents an interface definition.
ChunkTypeInterface ChunkType = "interface"
// ChunkTypeType represents a type alias or type definition.
ChunkTypeType ChunkType = "type"
// ChunkTypeConst represents constant declarations.
ChunkTypeConst ChunkType = "const"
// ChunkTypeVar represents variable declarations.
ChunkTypeVar ChunkType = "var"
)
// Language represents a programming language.
type Language string
const (
// LanguageGo represents the Go programming language.
LanguageGo Language = "go"
// LanguagePython represents the Python programming language.
LanguagePython Language = "python"
// LanguageTypeScript represents the TypeScript programming language.
LanguageTypeScript Language = "typescript"
// LanguageJavaScript represents the JavaScript programming language.
LanguageJavaScript Language = "javascript"
)
// Chunk represents a semantic code chunk with AST-derived boundaries.
type Chunk struct {
Metadata map[string]interface{}
FilePath string
Language Language
Type ChunkType
Name string
ParentName string
Content string
Signature string
DocComment string
StartLine int
EndLine int
}
// Identifier returns a human-readable identifier for this chunk.
// Format: "ParentName.Name" for methods, "Name" for top-level.
func (c *Chunk) Identifier() string {
if c.ParentName != "" {
return fmt.Sprintf("%s.%s", c.ParentName, c.Name)
}
return c.Name
}
// LineRange returns a human-readable line range.
// Format: "L123-L456"
func (c *Chunk) LineRange() string {
return fmt.Sprintf("L%d-L%d", c.StartLine, c.EndLine)
}
// SearchableContent returns content optimized for semantic search.
// Combines signature, doc comment, and content in a structured format.
func (c *Chunk) SearchableContent() string {
var parts []string
// Include signature for functions/methods
if c.Signature != "" {
parts = append(parts, c.Signature)
}
// Include doc comment
if c.DocComment != "" {
parts = append(parts, c.DocComment)
}
// Include actual content
if c.Content != "" {
parts = append(parts, c.Content)
}
return strings.Join(parts, "\n\n")
}
// Chunker is the interface for language-specific code chunkers.
type Chunker interface {
// Chunk parses a source file and returns semantic code chunks.
// Returns an error if the file cannot be parsed or read.
Chunk(ctx context.Context, filePath string) ([]Chunk, error)
// Language returns the language this chunker supports.
Language() Language
// SupportedExtensions returns file extensions this chunker handles.
// Example: []string{".go"} for Go chunker
SupportedExtensions() []string
}
// ChunkOptions provides options for chunking behavior.
type ChunkOptions struct {
// MaxChunkSize is the maximum size of a chunk in bytes.
// Chunks larger than this will be split (respecting boundaries where possible).
// 0 means no limit.
MaxChunkSize int
// IncludeDocComments controls whether to include documentation comments.
IncludeDocComments bool
// IncludePrivate controls whether to include private/unexported symbols.
IncludePrivate bool
// MinLines is the minimum number of lines for a chunk to be included.
// Chunks smaller than this will be skipped.
// 0 means no minimum.
MinLines int
}
// DefaultChunkOptions returns sensible default options.
func DefaultChunkOptions() ChunkOptions {
return ChunkOptions{
MaxChunkSize: 8192, // ~8KB per chunk (well under token limit)
IncludeDocComments: true,
IncludePrivate: true, // Include all symbols for comprehensive search
MinLines: 0, // No minimum - include even single-line functions
}
}
-213
View File
@@ -1,213 +0,0 @@
package chunking
import (
"testing"
"github.com/stretchr/testify/assert"
)
// =============================================================================
// TESTS FOR Chunk METHODS
// =============================================================================
func TestChunk_Identifier(t *testing.T) {
tests := []struct {
name string
expected string
chunk Chunk
}{
// ===== GOOD CASES =====
{
name: "top-level function",
chunk: Chunk{
Name: "MyFunction",
ParentName: "",
},
expected: "MyFunction",
},
{
name: "method with parent",
chunk: Chunk{
Name: "Process",
ParentName: "Handler",
},
expected: "Handler.Process",
},
{
name: "nested method",
chunk: Chunk{
Name: "Validate",
ParentName: "UserService",
},
expected: "UserService.Validate",
},
// ===== EDGE CASES =====
{
name: "empty name",
chunk: Chunk{
Name: "",
ParentName: "",
},
expected: "",
},
{
name: "parent but no name",
chunk: Chunk{
Name: "",
ParentName: "Parent",
},
expected: "Parent.",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tt.chunk.Identifier()
assert.Equal(t, tt.expected, result)
})
}
}
func TestChunk_LineRange(t *testing.T) {
tests := []struct {
name string
expected string
chunk Chunk
}{
// ===== GOOD CASES =====
{
name: "single line",
chunk: Chunk{
StartLine: 10,
EndLine: 10,
},
expected: "L10-L10",
},
{
name: "multi-line",
chunk: Chunk{
StartLine: 25,
EndLine: 50,
},
expected: "L25-L50",
},
// ===== EDGE CASES =====
{
name: "line 1",
chunk: Chunk{
StartLine: 1,
EndLine: 5,
},
expected: "L1-L5",
},
{
name: "large line numbers",
chunk: Chunk{
StartLine: 1000,
EndLine: 2500,
},
expected: "L1000-L2500",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tt.chunk.LineRange()
assert.Equal(t, tt.expected, result)
})
}
}
func TestChunk_SearchableContent(t *testing.T) {
tests := []struct {
name string
contains []string
chunk Chunk
}{
// ===== GOOD CASES =====
{
name: "full chunk with all fields",
chunk: Chunk{
Signature: "func ProcessData(input []byte) error",
DocComment: "// ProcessData handles incoming data",
Content: "func ProcessData(input []byte) error {\n\treturn nil\n}",
},
contains: []string{
"func ProcessData(input []byte) error",
"ProcessData handles incoming data",
"return nil",
},
},
{
name: "only signature",
chunk: Chunk{
Signature: "func Hello()",
},
contains: []string{"func Hello()"},
},
{
name: "only content",
chunk: Chunk{
Content: "some code here",
},
contains: []string{"some code here"},
},
// ===== EDGE CASES =====
{
name: "empty chunk",
chunk: Chunk{},
contains: []string{},
},
{
name: "only doc comment",
chunk: Chunk{
DocComment: "// Important documentation",
},
contains: []string{"Important documentation"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tt.chunk.SearchableContent()
for _, expected := range tt.contains {
assert.Contains(t, result, expected)
}
})
}
}
func TestDefaultChunkOptions(t *testing.T) {
opts := DefaultChunkOptions()
assert.Greater(t, opts.MaxChunkSize, 0, "MaxChunkSize should be positive")
assert.True(t, opts.IncludeDocComments, "IncludeDocComments should be true by default")
assert.True(t, opts.IncludePrivate, "IncludePrivate should be true by default")
assert.Equal(t, 0, opts.MinLines, "MinLines should be 0 by default")
}
// =============================================================================
// TESTS FOR ChunkType AND Language CONSTANTS
// =============================================================================
func TestChunkType_Values(t *testing.T) {
// Ensure all chunk types have expected values
assert.Equal(t, ChunkType("function"), ChunkTypeFunction)
assert.Equal(t, ChunkType("method"), ChunkTypeMethod)
assert.Equal(t, ChunkType("class"), ChunkTypeClass)
assert.Equal(t, ChunkType("interface"), ChunkTypeInterface)
assert.Equal(t, ChunkType("type"), ChunkTypeType)
assert.Equal(t, ChunkType("const"), ChunkTypeConst)
assert.Equal(t, ChunkType("var"), ChunkTypeVar)
}
func TestLanguage_Values(t *testing.T) {
// Ensure all language types have expected values
assert.Equal(t, Language("go"), LanguageGo)
assert.Equal(t, Language("python"), LanguagePython)
assert.Equal(t, Language("typescript"), LanguageTypeScript)
assert.Equal(t, Language("javascript"), LanguageJavaScript)
}
-403
View File
@@ -1,403 +0,0 @@
// Package typescript provides AST-aware chunking for TypeScript and JavaScript source files using tree-sitter.
package typescript
import (
"context"
"fmt"
"os"
"strings"
sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/typescript/typescript"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
)
// Chunker implements AST-aware chunking for TypeScript/JavaScript files.
type Chunker struct {
parser *sitter.Parser
options chunking.ChunkOptions
}
// NewChunker creates a new TypeScript chunker.
func NewChunker(options chunking.ChunkOptions) *Chunker {
parser := sitter.NewParser()
parser.SetLanguage(typescript.GetLanguage())
return &Chunker{
options: options,
parser: parser,
}
}
// Language returns the language this chunker supports.
func (c *Chunker) Language() chunking.Language {
return chunking.LanguageTypeScript
}
// SupportedExtensions returns the file extensions this chunker handles.
func (c *Chunker) SupportedExtensions() []string {
return []string{".ts", ".tsx", ".js", ".jsx"}
}
// Chunk parses a TypeScript/JavaScript source file and returns semantic code chunks.
func (c *Chunker) Chunk(ctx context.Context, filePath string) ([]chunking.Chunk, error) {
// Read file content
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("read file: %w", err)
}
// Parse the file
tree, err := c.parser.ParseCtx(ctx, nil, content)
if err != nil {
return nil, fmt.Errorf("parse TypeScript file: %w", err)
}
defer tree.Close()
sourceLines := strings.Split(string(content), "\n")
chunks := make([]chunking.Chunk, 0)
// Walk the AST and extract chunks
c.walkNode(tree.RootNode(), content, sourceLines, filePath, "", &chunks)
return chunks, nil
}
// walkNode recursively walks the tree-sitter AST and extracts chunks.
func (c *Chunker) walkNode(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string, chunks *[]chunking.Chunk) {
nodeType := node.Type()
switch nodeType {
case "function_declaration":
chunk := c.extractFunction(node, source, sourceLines, filePath, parentName)
if chunk != nil {
*chunks = append(*chunks, *chunk)
}
case "method_definition":
chunk := c.extractMethod(node, source, sourceLines, filePath, parentName)
if chunk != nil {
*chunks = append(*chunks, *chunk)
}
case "arrow_function", "function_expression":
// Handle arrow functions and function expressions assigned to variables
chunk := c.extractFunctionExpression(node, source, sourceLines, filePath, parentName)
if chunk != nil {
*chunks = append(*chunks, *chunk)
}
case "class_declaration":
chunk := c.extractClass(node, source, sourceLines, filePath)
if chunk != nil {
*chunks = append(*chunks, *chunk)
// Walk class body to find methods
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "class_body" {
c.walkNode(child, source, sourceLines, filePath, chunk.Name, chunks)
}
}
}
return // Don't walk children again
case "interface_declaration":
chunk := c.extractInterface(node, source, sourceLines, filePath)
if chunk != nil {
*chunks = append(*chunks, *chunk)
}
case "type_alias_declaration":
chunk := c.extractTypeAlias(node, source, sourceLines, filePath)
if chunk != nil {
*chunks = append(*chunks, *chunk)
}
}
// Walk all children
for i := 0; i < int(node.ChildCount()); i++ {
c.walkNode(node.Child(i), source, sourceLines, filePath, parentName, chunks)
}
}
// extractFunction extracts a function declaration.
func (c *Chunker) extractFunction(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk {
name := c.findChildContent(node, "identifier", source)
if name == "" {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageTypeScript,
Type: chunking.ChunkTypeFunction,
Name: name,
ParentName: parentName,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
Signature: c.extractFunctionSignature(node, source, sourceLines),
}
// Extract JSDoc comment
if c.options.IncludeDocComments {
chunk.DocComment = c.extractComment(node, source)
}
return chunk
}
// extractMethod extracts a method definition from a class.
func (c *Chunker) extractMethod(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk {
name := c.findChildContent(node, "property_identifier", source)
if name == "" {
return nil
}
// Skip private methods if configured
if !c.options.IncludePrivate && strings.HasPrefix(name, "_") {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageTypeScript,
Type: chunking.ChunkTypeMethod,
Name: name,
ParentName: parentName,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
Signature: c.extractMethodSignature(node, source, sourceLines),
}
// Extract JSDoc comment
if c.options.IncludeDocComments {
chunk.DocComment = c.extractComment(node, source)
}
return chunk
}
// extractFunctionExpression extracts arrow functions and function expressions.
func (c *Chunker) extractFunctionExpression(node *sitter.Node, source []byte, sourceLines []string, filePath string, parentName string) *chunking.Chunk {
// Try to find the variable name from parent
parent := node.Parent()
if parent == nil {
return nil
}
var name string
if parent.Type() == "variable_declarator" {
name = c.findChildContent(parent, "identifier", source)
} else if parent.Type() == "assignment_expression" {
// Handle const foo = () => {}
for i := 0; i < int(parent.ChildCount()); i++ {
child := parent.Child(i)
if child.Type() == "identifier" || child.Type() == "member_expression" {
name = child.Content(source)
break
}
}
}
if name == "" {
return nil // Anonymous function, skip
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageTypeScript,
Type: chunking.ChunkTypeFunction,
Name: name,
ParentName: parentName,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
}
return chunk
}
// extractClass extracts a class declaration.
func (c *Chunker) extractClass(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk {
name := c.findChildContent(node, "type_identifier", source)
if name == "" {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageTypeScript,
Type: chunking.ChunkTypeClass,
Name: name,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
Signature: c.extractClassSignature(node, source, sourceLines),
}
// Extract JSDoc comment
if c.options.IncludeDocComments {
chunk.DocComment = c.extractComment(node, source)
}
return chunk
}
// extractInterface extracts an interface declaration.
func (c *Chunker) extractInterface(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk {
name := c.findChildContent(node, "type_identifier", source)
if name == "" {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageTypeScript,
Type: chunking.ChunkTypeInterface,
Name: name,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
}
// Extract JSDoc comment
if c.options.IncludeDocComments {
chunk.DocComment = c.extractComment(node, source)
}
return chunk
}
// extractTypeAlias extracts a type alias declaration.
func (c *Chunker) extractTypeAlias(node *sitter.Node, source []byte, sourceLines []string, filePath string) *chunking.Chunk {
name := c.findChildContent(node, "type_identifier", source)
if name == "" {
return nil
}
startLine := int(node.StartPoint().Row) + 1
endLine := int(node.EndPoint().Row) + 1
chunk := &chunking.Chunk{
FilePath: filePath,
Language: chunking.LanguageTypeScript,
Type: chunking.ChunkTypeType,
Name: name,
StartLine: startLine,
EndLine: endLine,
Content: c.extractLines(sourceLines, startLine, endLine),
}
return chunk
}
// findChildContent finds the first child of the given type and returns its content.
func (c *Chunker) findChildContent(node *sitter.Node, childType string, source []byte) string {
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == childType {
return child.Content(source)
}
}
return ""
}
// extractFunctionSignature extracts the function signature.
func (c *Chunker) extractFunctionSignature(node *sitter.Node, source []byte, sourceLines []string) string {
startLine := int(node.StartPoint().Row) + 1
// Find the opening brace of the body
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "statement_block" {
endLine := int(child.StartPoint().Row) + 1
return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine-1))
}
}
// Fallback: just return first line
return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine))
}
// extractMethodSignature extracts the method signature.
func (c *Chunker) extractMethodSignature(node *sitter.Node, source []byte, sourceLines []string) string {
startLine := int(node.StartPoint().Row) + 1
// Find the opening brace of the body
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "statement_block" {
endLine := int(child.StartPoint().Row) + 1
return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine-1))
}
}
return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine))
}
// extractClassSignature extracts the class declaration line.
func (c *Chunker) extractClassSignature(node *sitter.Node, source []byte, sourceLines []string) string {
startLine := int(node.StartPoint().Row) + 1
// Find the opening brace of the class body
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
if child.Type() == "class_body" {
endLine := int(child.StartPoint().Row) + 1
return strings.TrimSpace(c.extractLines(sourceLines, startLine, endLine-1))
}
}
return strings.TrimSpace(c.extractLines(sourceLines, startLine, startLine))
}
// extractComment extracts JSDoc or other comments from a node.
func (c *Chunker) extractComment(node *sitter.Node, source []byte) string {
// Check previous sibling for comment
prevSibling := node.PrevSibling()
if prevSibling != nil && prevSibling.Type() == "comment" {
comment := prevSibling.Content(source)
// Remove comment markers
comment = strings.TrimPrefix(comment, "/**")
comment = strings.TrimPrefix(comment, "/*")
comment = strings.TrimSuffix(comment, "*/")
comment = strings.TrimPrefix(comment, "//")
return strings.TrimSpace(comment)
}
return ""
}
// extractLines extracts a range of lines from source (1-indexed, inclusive).
func (c *Chunker) extractLines(lines []string, start, end int) string {
if start < 1 || end < start || start > len(lines) {
return ""
}
startIdx := start - 1
endIdx := end
if endIdx > len(lines) {
endIdx = len(lines)
}
return strings.Join(lines[startIdx:endIdx], "\n")
}
@@ -1,398 +0,0 @@
package typescript
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/lukaszraczylo/claude-mnemonic/internal/chunking"
)
// =============================================================================
// TEST HELPERS
// =============================================================================
func createTempTSFile(t *testing.T, content string, ext string) string {
t.Helper()
tmpDir := t.TempDir()
filePath := filepath.Join(tmpDir, "test"+ext)
err := os.WriteFile(filePath, []byte(content), 0600)
require.NoError(t, err)
return filePath
}
// =============================================================================
// TESTS FOR Chunker
// =============================================================================
func TestNewChunker(t *testing.T) {
t.Parallel()
opts := chunking.DefaultChunkOptions()
c := NewChunker(opts)
assert.NotNil(t, c)
assert.NotNil(t, c.parser)
}
func TestChunker_Language(t *testing.T) {
t.Parallel()
c := NewChunker(chunking.DefaultChunkOptions())
assert.Equal(t, chunking.LanguageTypeScript, c.Language())
}
func TestChunker_SupportedExtensions(t *testing.T) {
t.Parallel()
c := NewChunker(chunking.DefaultChunkOptions())
exts := c.SupportedExtensions()
assert.Contains(t, exts, ".ts")
assert.Contains(t, exts, ".tsx")
assert.Contains(t, exts, ".js")
assert.Contains(t, exts, ".jsx")
}
func TestChunker_Chunk_SimpleFunction(t *testing.T) {
t.Parallel()
code := `function greet(name: string): string {
return "Hello, " + name + "!";
}
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find the greet function
var foundGreet bool
for _, chunk := range chunks {
if chunk.Name == "greet" {
foundGreet = true
assert.Equal(t, chunking.ChunkTypeFunction, chunk.Type)
assert.Equal(t, chunking.LanguageTypeScript, chunk.Language)
assert.Contains(t, chunk.Content, "function greet")
}
}
assert.True(t, foundGreet, "Should find 'greet' function")
}
func TestChunker_Chunk_ClassWithMethods(t *testing.T) {
t.Parallel()
code := `class Calculator {
add(a: number, b: number): number {
return a + b;
}
multiply(a: number, b: number): number {
return a * b;
}
}
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find the Calculator class and its methods
var foundClass, foundAdd, foundMultiply bool
for _, chunk := range chunks {
switch chunk.Name {
case "Calculator":
foundClass = true
assert.Equal(t, chunking.ChunkTypeClass, chunk.Type)
case "add":
foundAdd = true
assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type)
assert.Equal(t, "Calculator", chunk.ParentName)
case "multiply":
foundMultiply = true
assert.Equal(t, chunking.ChunkTypeMethod, chunk.Type)
assert.Equal(t, "Calculator", chunk.ParentName)
}
}
assert.True(t, foundClass, "Should find 'Calculator' class")
assert.True(t, foundAdd, "Should find 'add' method")
assert.True(t, foundMultiply, "Should find 'multiply' method")
}
func TestChunker_Chunk_Interface(t *testing.T) {
t.Parallel()
code := `interface User {
id: number;
name: string;
email: string;
}
interface Authenticator {
login(username: string, password: string): boolean;
logout(): void;
}
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find interfaces
interfaceNames := make(map[string]bool)
for _, chunk := range chunks {
if chunk.Type == chunking.ChunkTypeInterface {
interfaceNames[chunk.Name] = true
}
}
assert.True(t, interfaceNames["User"])
assert.True(t, interfaceNames["Authenticator"])
}
func TestChunker_Chunk_TypeAlias(t *testing.T) {
t.Parallel()
code := `type UserID = string;
type Handler = (event: Event) => void;
type Result<T> = { success: true; data: T } | { success: false; error: Error };
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find type aliases
typeNames := make(map[string]bool)
for _, chunk := range chunks {
if chunk.Type == chunking.ChunkTypeType {
typeNames[chunk.Name] = true
}
}
assert.True(t, typeNames["UserID"])
assert.True(t, typeNames["Handler"])
assert.True(t, typeNames["Result"])
}
func TestChunker_Chunk_ArrowFunction(t *testing.T) {
t.Parallel()
code := `const add = (a: number, b: number): number => a + b;
const greet = (name: string): string => {
return "Hello, " + name;
};
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
_, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
// Arrow functions may or may not be captured depending on AST structure
// At minimum, no error should occur
}
func TestChunker_Chunk_FileNotFound(t *testing.T) {
t.Parallel()
c := NewChunker(chunking.DefaultChunkOptions())
_, err := c.Chunk(context.Background(), "/nonexistent/path/file.ts")
require.Error(t, err)
assert.Contains(t, err.Error(), "read file")
}
func TestChunker_Chunk_EmptyFile(t *testing.T) {
t.Parallel()
filePath := createTempTSFile(t, "", ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
assert.Empty(t, chunks)
}
func TestChunker_Chunk_OnlyComments(t *testing.T) {
t.Parallel()
code := `// This is a comment
/* Another comment */
/**
* JSDoc comment
*/
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
// Comments without code should not produce chunks
assert.Empty(t, chunks)
}
func TestChunker_Chunk_AsyncFunction(t *testing.T) {
t.Parallel()
code := `async function fetchData(url: string): Promise<any> {
const response = await fetch(url);
return response.json();
}
async function processItems(items: string[]): Promise<void> {
for (const item of items) {
await process(item);
}
}
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find async functions
functionNames := make(map[string]bool)
for _, chunk := range chunks {
if chunk.Type == chunking.ChunkTypeFunction {
functionNames[chunk.Name] = true
}
}
assert.True(t, functionNames["fetchData"])
assert.True(t, functionNames["processItems"])
}
func TestChunker_Chunk_ExportedFunction(t *testing.T) {
t.Parallel()
code := `export function publicFunction(): void {
console.log("public");
}
export default function defaultExport(): void {
console.log("default");
}
`
filePath := createTempTSFile(t, code, ".ts")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find exported functions
functionNames := make(map[string]bool)
for _, chunk := range chunks {
if chunk.Type == chunking.ChunkTypeFunction {
functionNames[chunk.Name] = true
}
}
assert.True(t, functionNames["publicFunction"])
assert.True(t, functionNames["defaultExport"])
}
func TestChunker_Chunk_JSXFile(t *testing.T) {
t.Parallel()
code := `function Button({ label }: { label: string }) {
return <button>{label}</button>;
}
function App() {
return (
<div>
<Button label="Click me" />
</div>
);
}
`
filePath := createTempTSFile(t, code, ".tsx")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find JSX components as functions
functionNames := make(map[string]bool)
for _, chunk := range chunks {
if chunk.Type == chunking.ChunkTypeFunction {
functionNames[chunk.Name] = true
}
}
assert.True(t, functionNames["Button"])
assert.True(t, functionNames["App"])
}
func TestChunker_Chunk_JavaScript(t *testing.T) {
t.Parallel()
code := `function simpleFunc() {
return 42;
}
class MyClass {
constructor() {
this.value = 0;
}
getValue() {
return this.value;
}
}
`
filePath := createTempTSFile(t, code, ".js")
c := NewChunker(chunking.DefaultChunkOptions())
chunks, err := c.Chunk(context.Background(), filePath)
require.NoError(t, err)
require.NotEmpty(t, chunks)
// Should find JavaScript functions and classes
var foundFunc, foundClass bool
for _, chunk := range chunks {
if chunk.Name == "simpleFunc" {
foundFunc = true
}
if chunk.Name == "MyClass" {
foundClass = true
}
}
assert.True(t, foundFunc, "Should find 'simpleFunc' function")
assert.True(t, foundClass, "Should find 'MyClass' class")
}