feat(docs, ci, config): add comprehensive documentation and tooling

- [x] Add API reference documentation with tool descriptions and examples
- [x] Add ERROR_CODES reference with error descriptions and remediation steps
- [x] Add PERFORMANCE tuning guide with caching and optimization details
- [x] Add GitHub Actions workflows for linting and security scanning
- [x] Add golangci-lint configuration with comprehensive linter settings
- [x] Add pre-commit hooks configuration for local development
- [x] Add API documentation generator tool (cmd/docgen)
- [x] Update Go version from 1.24 to 1.25 across workflows
- [x] Add static build configuration to goreleaser
- [x] Add metrics package with Prometheus-style metric types
- [x] Add parser benchmarks for performance testing
- [x] Add LSP manager integration tests
- [x] Add server integration tests with MCP protocol flow testing
- [x] Extract regex cache to shared utility package
- [x] Add context cancellation handling in AST queries
- [x] Add graceful shutdown with timeout to server
- [x] Add configurable max parse size (MaxParseSize)
- [x] Add Config.Validate() method with comprehensive checks
- [x] Add parser cache statistics tracking
- [x] Add file permission preservation in edit operations
- [x] Improve line splitting for large files with bufio.Scanner
- [x] Add comprehensive config tests for edge cases
- [x] Update Makefile with new targets and documentation
This commit is contained in:
2026-01-28 20:43:20 +00:00
parent 143a166249
commit 9205b2bc26
27 changed files with 6332 additions and 1634 deletions
+101 -10
View File
@@ -5,6 +5,8 @@ import (
"context"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/cespare/xxhash/v2"
lru "github.com/hashicorp/golang-lru/v2"
@@ -22,14 +24,25 @@ import (
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
)
// MaxFileSize is the maximum file size we'll parse (10MB).
// MaxFileSize is the default maximum file size we'll parse (10MB).
// Deprecated: Use Registry.maxParseSize instead.
const MaxFileSize = 10 * 1024 * 1024
// Registry manages Tree-sitter parsers for different languages.
type Registry struct {
parsers map[protocol.Language]*sitter.Parser
cache *lru.Cache[string, *CachedTree]
mu sync.RWMutex
parsers map[protocol.Language]*sitter.Parser
cache *lru.Cache[string, *CachedTree]
maxParseSize int64
mu sync.RWMutex
// Cache metrics (atomic for thread-safety)
cacheHits atomic.Int64
cacheMisses atomic.Int64
// Parse duration tracking
totalParseTime atomic.Int64 // nanoseconds
parseCount atomic.Int64
lastParseDuration atomic.Int64 // nanoseconds
}
// CachedTree stores a parsed tree with its metadata.
@@ -54,8 +67,27 @@ type SyntaxError struct {
Location protocol.Location
}
// NewRegistry creates a new parser registry.
// CacheStatsResult contains cache statistics.
type CacheStatsResult struct {
Hits int64 `json:"hits"`
Misses int64 `json:"misses"`
HitRate float64 `json:"hit_rate"`
Size int `json:"size"`
TotalParseTime int64 `json:"total_parse_time_ns"`
ParseCount int64 `json:"parse_count"`
AvgParseTime int64 `json:"avg_parse_time_ns"`
LastParseTime int64 `json:"last_parse_time_ns"`
}
// NewRegistry creates a new parser registry with the default max parse size.
// For custom max parse size, use NewRegistryWithSize.
func NewRegistry() *Registry {
return NewRegistryWithSize(0)
}
// NewRegistryWithSize creates a new parser registry with the specified max parse size.
// If maxParseSize is 0 or negative, uses the default MaxFileSize constant.
func NewRegistryWithSize(maxParseSize int64) *Registry {
// Create LRU cache with capacity of 100 trees
cache, err := lru.New[string, *CachedTree](100)
if err != nil {
@@ -63,9 +95,14 @@ func NewRegistry() *Registry {
panic(fmt.Sprintf("failed to create LRU cache: %v", err))
}
if maxParseSize <= 0 {
maxParseSize = MaxFileSize
}
return &Registry{
parsers: make(map[protocol.Language]*sitter.Parser),
cache: cache,
parsers: make(map[protocol.Language]*sitter.Parser),
cache: cache,
maxParseSize: maxParseSize,
}
}
@@ -130,9 +167,9 @@ func (r *Registry) GetParser(lang protocol.Language) (*sitter.Parser, error) {
// Parse parses the given content for the specified language.
func (r *Registry) Parse(ctx context.Context, filename string, content []byte) (*ParseResult, error) {
// Check file size
if len(content) > MaxFileSize {
return nil, errors.NewFileTooLarge(filename, int64(len(content)), MaxFileSize)
// Check file size against configured limit
if int64(len(content)) > r.maxParseSize {
return nil, errors.NewFileTooLarge(filename, int64(len(content)), r.maxParseSize)
}
// Detect binary files
@@ -161,6 +198,7 @@ func (r *Registry) Parse(ctx context.Context, filename string, content []byte) (
// Check cache (LRU cache is thread-safe)
hash := contentHash(content)
if cached, ok := r.cache.Get(hash); ok && cached.Language == lang {
r.cacheHits.Add(1)
errors := extractErrors(cached.Tree.RootNode(), content)
return &ParseResult{
Tree: cached.Tree,
@@ -169,6 +207,7 @@ func (r *Registry) Parse(ctx context.Context, filename string, content []byte) (
Content: content,
}, nil
}
r.cacheMisses.Add(1)
// Get parser
parser, err := r.GetParser(lang)
@@ -178,9 +217,17 @@ func (r *Registry) Parse(ctx context.Context, filename string, content []byte) (
// Parse content - tree-sitter parsers are not thread-safe,
// so we need to hold the lock during parsing
// Track parse duration
start := time.Now()
r.mu.Lock()
tree, err := parser.ParseCtx(ctx, nil, content)
r.mu.Unlock()
duration := time.Since(start)
// Update duration metrics
r.totalParseTime.Add(duration.Nanoseconds())
r.parseCount.Add(1)
r.lastParseDuration.Store(duration.Nanoseconds())
if err != nil {
return nil, errors.NewParseError(string(lang), filename, err)
@@ -203,6 +250,50 @@ func (r *Registry) Parse(ctx context.Context, filename string, content []byte) (
}, nil
}
// CacheStats returns cache hit/miss statistics.
func (r *Registry) CacheStats() (hits, misses int64) {
return r.cacheHits.Load(), r.cacheMisses.Load()
}
// CacheStatsDetailed returns detailed cache and parse statistics.
func (r *Registry) CacheStatsDetailed() CacheStatsResult {
hits := r.cacheHits.Load()
misses := r.cacheMisses.Load()
totalParseTime := r.totalParseTime.Load()
parseCount := r.parseCount.Load()
var hitRate float64
total := hits + misses
if total > 0 {
hitRate = float64(hits) / float64(total)
}
var avgParseTime int64
if parseCount > 0 {
avgParseTime = totalParseTime / parseCount
}
return CacheStatsResult{
Hits: hits,
Misses: misses,
HitRate: hitRate,
Size: r.cache.Len(),
TotalParseTime: totalParseTime,
ParseCount: parseCount,
AvgParseTime: avgParseTime,
LastParseTime: r.lastParseDuration.Load(),
}
}
// ResetStats resets all cache and parse statistics.
func (r *Registry) ResetStats() {
r.cacheHits.Store(0)
r.cacheMisses.Store(0)
r.totalParseTime.Store(0)
r.parseCount.Store(0)
r.lastParseDuration.Store(0)
}
// extractErrors finds all error nodes in the tree.
func extractErrors(node *sitter.Node, _ []byte) []SyntaxError {
var errors []SyntaxError
+459
View File
@@ -0,0 +1,459 @@
package parser
import (
"context"
"strings"
"testing"
)
// BenchmarkParse benchmarks parsing files of various sizes.
func BenchmarkParse(b *testing.B) {
registry := NewRegistry()
defer registry.Close()
ctx := context.Background()
benchmarks := []struct {
name string
content string
}{
{
name: "small_file_100_lines",
content: generateGoCode(100),
},
{
name: "medium_file_1000_lines",
content: generateGoCode(1000),
},
{
name: "large_file_5000_lines",
content: generateGoCode(5000),
},
{
name: "very_large_file_10000_lines",
content: generateGoCode(10000),
},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
content := []byte(bm.content)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
})
}
}
// BenchmarkParseCacheHit benchmarks cache hit performance.
func BenchmarkParseCacheHit(b *testing.B) {
registry := NewRegistry()
defer registry.Close()
ctx := context.Background()
content := []byte(generateGoCode(1000))
// Warm up the cache
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("initial parse failed: %v", err)
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
}
// BenchmarkParseCacheMiss benchmarks cache miss performance.
func BenchmarkParseCacheMiss(b *testing.B) {
registry := NewRegistry()
defer registry.Close()
ctx := context.Background()
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Use different content each time to force cache miss
content := []byte(generateGoCodeWithSuffix(1000, i))
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
}
// BenchmarkParseLanguages benchmarks parsing different language files.
func BenchmarkParseLanguages(b *testing.B) {
registry := NewRegistry()
defer registry.Close()
ctx := context.Background()
languages := []struct {
name string
filename string
content string
}{
{
name: "go",
filename: "test.go",
content: generateGoCode(500),
},
{
name: "typescript",
filename: "test.ts",
content: generateTypeScriptCode(500),
},
{
name: "python",
filename: "test.py",
content: generatePythonCode(500),
},
{
name: "javascript",
filename: "test.js",
content: generateJavaScriptCode(500),
},
}
for _, lang := range languages {
b.Run(lang.name, func(b *testing.B) {
content := []byte(lang.content)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := registry.Parse(ctx, lang.filename, content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
})
}
}
// BenchmarkParseComplexity benchmarks parsing files with varying complexity.
func BenchmarkParseComplexity(b *testing.B) {
registry := NewRegistry()
defer registry.Close()
ctx := context.Background()
benchmarks := []struct {
name string
content string
}{
{
name: "simple_functions",
content: generateSimpleFunctions(100),
},
{
name: "nested_structures",
content: generateNestedStructures(50),
},
{
name: "complex_types",
content: generateComplexTypes(50),
},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
content := []byte(bm.content)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
})
}
}
// BenchmarkContentHash benchmarks the content hashing function.
func BenchmarkContentHash(b *testing.B) {
sizes := []int{100, 1000, 10000, 100000}
for _, size := range sizes {
b.Run(formatSize(size), func(b *testing.B) {
content := []byte(strings.Repeat("a", size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = contentHash(content)
}
})
}
}
// BenchmarkIsBinary benchmarks the binary detection function.
func BenchmarkIsBinary(b *testing.B) {
sizes := []int{100, 1000, 8000, 10000}
for _, size := range sizes {
b.Run(formatSize(size)+"_text", func(b *testing.B) {
content := []byte(strings.Repeat("Hello, World!\n", size/14))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = isBinary(content)
}
})
b.Run(formatSize(size)+"_binary", func(b *testing.B) {
content := make([]byte, size)
for j := 0; j < size; j++ {
content[j] = byte(j % 256)
}
content[size/2] = 0 // Add null byte
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = isBinary(content)
}
})
}
}
// BenchmarkParseWithMaxSize benchmarks parsing with different max size limits.
func BenchmarkParseWithMaxSize(b *testing.B) {
ctx := context.Background()
limits := []int64{
10 * 1024, // 10KB
100 * 1024, // 100KB
1024 * 1024, // 1MB
10 * 1024 * 1024, // 10MB
}
content := []byte(generateGoCode(500))
for _, limit := range limits {
b.Run(formatBytes(limit), func(b *testing.B) {
// Skip if content is larger than limit
if int64(len(content)) > limit {
b.Skipf("content size %d exceeds limit %d", len(content), limit)
}
registry := NewRegistryWithSize(limit)
defer registry.Close()
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
})
}
}
// BenchmarkConcurrentParse benchmarks concurrent parsing operations.
func BenchmarkConcurrentParse(b *testing.B) {
registry := NewRegistry()
defer registry.Close()
ctx := context.Background()
content := []byte(generateGoCode(500))
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := registry.Parse(ctx, "test.go", content)
if err != nil {
b.Fatalf("Parse failed: %v", err)
}
}
})
}
// Helper functions to generate test code
func generateGoCode(lines int) string {
var sb strings.Builder
sb.WriteString("package main\n\n")
for i := 0; i < lines/10; i++ {
sb.WriteString("func Function")
sb.WriteString(itoa(i))
sb.WriteString("(a, b int) int {\n")
sb.WriteString("\tif a > b {\n")
sb.WriteString("\t\treturn a + b\n")
sb.WriteString("\t}\n")
sb.WriteString("\treturn a - b\n")
sb.WriteString("}\n\n")
}
return sb.String()
}
func generateGoCodeWithSuffix(lines int, suffix int) string {
code := generateGoCode(lines)
return code + "// Suffix: " + itoa(suffix) + "\n"
}
func generateTypeScriptCode(lines int) string {
var sb strings.Builder
for i := 0; i < lines/8; i++ {
sb.WriteString("function function")
sb.WriteString(itoa(i))
sb.WriteString("(a: number, b: number): number {\n")
sb.WriteString(" if (a > b) {\n")
sb.WriteString(" return a + b;\n")
sb.WriteString(" }\n")
sb.WriteString(" return a - b;\n")
sb.WriteString("}\n\n")
}
return sb.String()
}
func generatePythonCode(lines int) string {
var sb strings.Builder
for i := 0; i < lines/6; i++ {
sb.WriteString("def function")
sb.WriteString(itoa(i))
sb.WriteString("(a, b):\n")
sb.WriteString(" if a > b:\n")
sb.WriteString(" return a + b\n")
sb.WriteString(" return a - b\n\n")
}
return sb.String()
}
func generateJavaScriptCode(lines int) string {
var sb strings.Builder
for i := 0; i < lines/8; i++ {
sb.WriteString("function function")
sb.WriteString(itoa(i))
sb.WriteString("(a, b) {\n")
sb.WriteString(" if (a > b) {\n")
sb.WriteString(" return a + b;\n")
sb.WriteString(" }\n")
sb.WriteString(" return a - b;\n")
sb.WriteString("}\n\n")
}
return sb.String()
}
func generateSimpleFunctions(count int) string {
var sb strings.Builder
sb.WriteString("package main\n\n")
for i := 0; i < count; i++ {
sb.WriteString("func Func")
sb.WriteString(itoa(i))
sb.WriteString("() { }\n\n")
}
return sb.String()
}
func generateNestedStructures(depth int) string {
var sb strings.Builder
sb.WriteString("package main\n\n")
for i := 0; i < depth; i++ {
sb.WriteString("type Struct")
sb.WriteString(itoa(i))
sb.WriteString(" struct {\n")
sb.WriteString("\tField1 int\n")
sb.WriteString("\tField2 string\n")
if i > 0 {
sb.WriteString("\tNested Struct")
sb.WriteString(itoa(i - 1))
sb.WriteString("\n")
}
sb.WriteString("}\n\n")
}
return sb.String()
}
func generateComplexTypes(count int) string {
var sb strings.Builder
sb.WriteString("package main\n\n")
for i := 0; i < count; i++ {
sb.WriteString("type Type")
sb.WriteString(itoa(i))
sb.WriteString(" interface {\n")
sb.WriteString("\tMethod1() error\n")
sb.WriteString("\tMethod2(a int, b string) (int, error)\n")
sb.WriteString("\tMethod3() chan interface{}\n")
sb.WriteString("}\n\n")
}
return sb.String()
}
func formatSize(size int) string {
if size < 1000 {
return itoa(size) + "B"
}
return itoa(size/1000) + "KB"
}
func formatBytes(bytes int64) string {
if bytes < 1024 {
return itoa(int(bytes)) + "B"
}
if bytes < 1024*1024 {
return itoa(int(bytes/1024)) + "KB"
}
return itoa(int(bytes/(1024*1024))) + "MB"
}
// Simple integer to string conversion without importing strconv
func itoa(n int) string {
if n == 0 {
return "0"
}
negative := n < 0
if negative {
n = -n
}
var buf [20]byte
i := len(buf) - 1
for n > 0 {
buf[i] = byte('0' + n%10)
n /= 10
i--
}
if negative {
buf[i] = '-'
i--
}
return string(buf[i+1:])
}