fixup! Update, bugfixes on diff and edit handling

This commit is contained in:
2026-02-22 14:03:54 +00:00
parent 6980d3b294
commit 982c2c8b44
23 changed files with 655 additions and 194 deletions
+8 -6
View File
@@ -4,6 +4,8 @@ import (
"context"
"fmt"
"testing"
"github.com/cespare/xxhash/v2"
)
// TestLRUCacheEviction tests that the LRU cache properly evicts old entries.
@@ -82,8 +84,8 @@ func TestContentHashCollisionResistance(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
hash1 := contentHash(tc.content1)
hash2 := contentHash(tc.content2)
hash1 := fmt.Sprintf("%016x", xxhash.Sum64(tc.content1))
hash2 := fmt.Sprintf("%016x", xxhash.Sum64(tc.content2))
if hash1 == hash2 {
t.Errorf("Hash collision: %s == %s for different content", hash1, hash2)
@@ -96,9 +98,9 @@ func TestContentHashCollisionResistance(t *testing.T) {
func TestContentHashConsistency(t *testing.T) {
content := []byte("package main\n\nfunc test() {}\n")
hash1 := contentHash(content)
hash2 := contentHash(content)
hash3 := contentHash(content)
hash1 := fmt.Sprintf("%016x", xxhash.Sum64(content))
hash2 := fmt.Sprintf("%016x", xxhash.Sum64(content))
hash3 := fmt.Sprintf("%016x", xxhash.Sum64(content))
if hash1 != hash2 || hash2 != hash3 {
t.Errorf("Hash inconsistency: %s, %s, %s", hash1, hash2, hash3)
@@ -115,7 +117,7 @@ func BenchmarkContentHash_xxHash(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = contentHash(content)
_ = fmt.Sprintf("%016x", xxhash.Sum64(content))
}
}
+3 -67
View File
@@ -24,9 +24,8 @@ import (
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
)
// MaxFileSize is the default maximum file size we'll parse (10MB).
// Deprecated: Use Registry.maxParseSize instead.
const MaxFileSize = 10 * 1024 * 1024
// maxFileSize is the default maximum file size we'll parse (10MB).
const maxFileSize = 10 * 1024 * 1024
// Registry manages Tree-sitter parsers for different languages.
type Registry struct {
@@ -69,18 +68,6 @@ type SyntaxError struct {
Location protocol.Location
}
// CacheStatsResult contains cache statistics.
type CacheStatsResult struct {
Hits int64 `json:"hits"`
Misses int64 `json:"misses"`
HitRate float64 `json:"hit_rate"`
Size int `json:"size"`
TotalParseTime int64 `json:"total_parse_time_ns"`
ParseCount int64 `json:"parse_count"`
AvgParseTime int64 `json:"avg_parse_time_ns"`
LastParseTime int64 `json:"last_parse_time_ns"`
}
// NewRegistry creates a new parser registry with the default max parse size.
// For custom max parse size, use NewRegistryWithSize.
func NewRegistry() *Registry {
@@ -98,7 +85,7 @@ func NewRegistryWithSize(maxParseSize int64) *Registry {
}
if maxParseSize <= 0 {
maxParseSize = MaxFileSize
maxParseSize = maxFileSize
}
return &Registry{
@@ -266,50 +253,6 @@ func (r *Registry) Parse(ctx context.Context, filename string, content []byte) (
}, nil
}
// CacheStats returns cache hit/miss statistics.
func (r *Registry) CacheStats() (hits, misses int64) {
return r.cacheHits.Load(), r.cacheMisses.Load()
}
// CacheStatsDetailed returns detailed cache and parse statistics.
func (r *Registry) CacheStatsDetailed() CacheStatsResult {
hits := r.cacheHits.Load()
misses := r.cacheMisses.Load()
totalParseTime := r.totalParseTime.Load()
parseCount := r.parseCount.Load()
var hitRate float64
total := hits + misses
if total > 0 {
hitRate = float64(hits) / float64(total)
}
var avgParseTime int64
if parseCount > 0 {
avgParseTime = totalParseTime / parseCount
}
return CacheStatsResult{
Hits: hits,
Misses: misses,
HitRate: hitRate,
Size: r.cache.Len(),
TotalParseTime: totalParseTime,
ParseCount: parseCount,
AvgParseTime: avgParseTime,
LastParseTime: r.lastParseDuration.Load(),
}
}
// ResetStats resets all cache and parse statistics.
func (r *Registry) ResetStats() {
r.cacheHits.Store(0)
r.cacheMisses.Store(0)
r.totalParseTime.Store(0)
r.parseCount.Store(0)
r.lastParseDuration.Store(0)
}
// extractErrors finds all error nodes in the tree.
func extractErrors(node *sitter.Node, _ []byte) []SyntaxError {
var errors []SyntaxError
@@ -346,13 +289,6 @@ func extractErrors(node *sitter.Node, _ []byte) []SyntaxError {
return errors
}
// contentHash returns a fast hash of the content for caching.
// Uses xxHash which is 5-10x faster than SHA256 for non-cryptographic purposes.
func contentHash(content []byte) string {
h := xxhash.Sum64(content)
return fmt.Sprintf("%016x", h)
}
// isBinary checks if content appears to be binary.
func isBinary(content []byte) bool {
// Check first 8000 bytes for null bytes
+4 -1
View File
@@ -2,8 +2,11 @@ package parser
import (
"context"
"fmt"
"strings"
"testing"
"github.com/cespare/xxhash/v2"
)
// BenchmarkParse benchmarks parsing files of various sizes.
@@ -194,7 +197,7 @@ func BenchmarkContentHash(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = contentHash(content)
_ = fmt.Sprintf("%016x", xxhash.Sum64(content))
}
})
}
+4 -4
View File
@@ -31,8 +31,8 @@ type JSONNode struct {
// ParseYAML parses YAML content and returns a tree-sitter-compatible result
func (r *Registry) ParseYAML(ctx context.Context, filename string, content []byte) (*ParseResult, error) {
// Check file size
if len(content) > MaxFileSize {
return nil, errors.NewFileTooLarge(filename, int64(len(content)), MaxFileSize)
if len(content) > maxFileSize {
return nil, errors.NewFileTooLarge(filename, int64(len(content)), maxFileSize)
}
// Parse YAML
@@ -57,8 +57,8 @@ func (r *Registry) ParseYAML(ctx context.Context, filename string, content []byt
// ParseJSON parses JSON content and returns a tree-sitter-compatible result
func (r *Registry) ParseJSON(ctx context.Context, filename string, content []byte) (*ParseResult, error) {
// Check file size
if len(content) > MaxFileSize {
return nil, errors.NewFileTooLarge(filename, int64(len(content)), MaxFileSize)
if len(content) > maxFileSize {
return nil, errors.NewFileTooLarge(filename, int64(len(content)), maxFileSize)
}
// Parse JSON to validate syntax