Files
filepuff-mcp/internal/server/handlers_file.go
T
lukaszraczylo 5ad975ee7a V2/token optimization (#11)
* v2.0: token-optimization overhaul

Additive (backward-compatible flags):
- file_read: skeleton mode, strip (imports/license/block_comments),
  compact_line_numbers, 8-char etag with prefix-match compat
- ast_query: format=verbose|compact|location, pagination cursor
- file_search: cluster mode, pagination cursor
- lsp_query (references): compact output

Breaking (v2):
- Preambles removed; opt-in verbose=true restores
- edit_apply: response=count|diff|none, default count
- ping tool removed
- symbol_at/find_definition/find_references merged into lsp_query
- Tool descriptions trimmed -83%, help moved to filepuff://help/<tool>
- Batch file_read dedups by etag

Protocol:
- ResourceLink returned for file_read >64 KiB (force_inline override)
- OnAfterInitialize hook reads capabilities.experimental.filepuff
  for session defaults (default_format, default_max_results,
  default_cluster, compact_refs, line_numbers,
  resource_link_threshold)

* fix: drop --max-total-count from ripgrep args

The flag does not exist in stable ripgrep (confirmed up to 15.1.0 --
"unrecognized flag --max-total-count, similar flags that are
available: --max-count"). Every file_search call failed on hosts with
stock rg. --max-count is per-file, not a drop-in replacement, so rely
on the in-process truncation in parseOutput that was already the
documented safety net.
2026-04-19 19:56:49 +01:00

595 lines
19 KiB
Go

// Package server implements the MCP server for file operations.
package server
import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"
"time"
xxhash "github.com/cespare/xxhash/v2"
"github.com/lukaszraczylo/mcp-filepuff/internal/cursor"
"github.com/lukaszraczylo/mcp-filepuff/internal/parser"
"github.com/lukaszraczylo/mcp-filepuff/internal/search"
"github.com/lukaszraczylo/mcp-filepuff/pkg/errors"
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
"github.com/mark3labs/mcp-go/mcp"
)
// handleFileSearch handles the file_search tool.
func (s *Server) handleFileSearch(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
start := time.Now()
defer func() {
s.logger.Debug("file_search completed",
"duration_ms", time.Since(start).Milliseconds(),
)
}()
if s.searcher == nil {
return mcp.NewToolResultError("ripgrep (rg) is not available. Please install it: https://github.com/BurntSushi/ripgrep#installation"), nil
}
pattern, err := request.RequireString("pattern")
if err != nil {
return mcp.NewToolResultError("pattern is required"), nil
}
paths := request.GetStringSlice("paths", nil)
fileTypes := request.GetStringSlice("file_types", nil)
ignoreCase := request.GetBool("ignore_case", false)
regex := request.GetBool("regex", true)
contextLines := request.GetInt("context_lines", 2)
// Consult session prefs for max_results and cluster when not explicitly supplied.
prefs := s.sessionPrefs.Load()
var prefsMaxResults int
var prefsCluster *bool
if prefs != nil {
prefsMaxResults = prefs.DefaultMaxResults
prefsCluster = prefs.DefaultCluster
}
maxResults := effectiveInt(request, "max_results", prefsMaxResults, 0)
cluster := effectiveBool(request, "cluster", prefsCluster, false)
cursorStr := request.GetString("cursor", "")
// Compute query hash for cursor validation.
queryHash := cursor.HashParams(map[string]string{
"pattern": pattern,
"paths": strings.Join(paths, ","),
"file_types": strings.Join(fileTypes, ","),
"ignore_case": strconv.FormatBool(ignoreCase),
"regex": strconv.FormatBool(regex),
"context_lines": strconv.Itoa(contextLines),
})
// Resolve cursor offset.
offset := 0
if cursorStr != "" {
off, hash, decErr := cursor.Decode(cursorStr)
if decErr != nil {
return mcp.NewToolResultError(fmt.Sprintf("invalid cursor: %s", decErr)), nil
}
if hash != queryHash {
return mcp.NewToolResultError("cursor is for a different query, re-run without cursor"), nil
}
offset = off
}
// When paginating with a cursor, fetch all results (no rg-level cap) so we
// can apply the offset in-process. Without a cursor, let rg cap at maxResults.
rgMaxResults := maxResults
if offset > 0 {
rgMaxResults = 0 // fetch all, apply cap after skipping
}
req := &search.Request{
Pattern: pattern,
Paths: paths,
FileTypes: fileTypes,
IgnoreCase: ignoreCase,
Regex: regex,
ContextLines: contextLines,
MaxResults: rgMaxResults,
}
results, err := s.searcher.Search(ctx, req)
if err != nil {
s.logger.Warn("search error", "error", err)
return mcp.NewToolResultError(fmt.Sprintf("search error: %s", errors.SanitizeError(err))), nil
}
// Apply cursor offset.
if offset > 0 && offset < len(results.Results) {
results.Results = results.Results[offset:]
results.Truncated = false // will re-evaluate below
} else if offset > 0 {
results.Results = nil
results.Truncated = false
}
// Apply in-process max_results cap and compute cursor footer.
var cursorLine string
if maxResults > 0 && len(results.Results) > maxResults {
remaining := len(results.Results) - maxResults
results.Results = results.Results[:maxResults]
results.Truncated = true
nextOffset := offset + maxResults
nextCursor := cursor.Encode(nextOffset, queryHash)
cursorLine = fmt.Sprintf("[cursor: %s, remaining: %d]", nextCursor, remaining)
}
s.logger.Info("search completed",
"pattern", pattern,
"results_count", len(results.Results),
"truncated", results.Truncated,
)
verbose := request.GetBool("verbose", false)
opts := search.FormatOptions{
Cluster: cluster,
CursorLine: cursorLine,
Verbose: verbose,
}
output := s.searcher.FormatResultsWithOptions(results, opts)
return mcp.NewToolResultText(output), nil
}
// effectiveInt returns the per-call value if the key is explicitly present in the
// request arguments, otherwise falls back to sessionDefault (if > 0), then builtIn.
func effectiveInt(request mcp.CallToolRequest, key string, sessionDefault, builtIn int) int {
if _, explicit := request.GetArguments()[key]; explicit {
return request.GetInt(key, builtIn)
}
if sessionDefault > 0 {
return sessionDefault
}
return builtIn
}
// effectiveBool returns the per-call value if the key is explicitly present in the
// request arguments, otherwise falls back to sessionDefault (if non-nil), then builtIn.
func effectiveBool(request mcp.CallToolRequest, key string, sessionDefault *bool, builtIn bool) bool {
if _, explicit := request.GetArguments()[key]; explicit {
return request.GetBool(key, builtIn)
}
if sessionDefault != nil {
return *sessionDefault
}
return builtIn
}
// handleFileRead handles the file_read tool.
func (s *Server) handleFileRead(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
select {
case s.readSem <- struct{}{}:
defer func() { <-s.readSem }()
case <-ctx.Done():
return mcp.NewToolResultError("request cancelled"), nil
}
// Batch mode: paths[] takes precedence over path.
// NOTE: batch reads are always inlined — mixing dedup + resource_links is
// too complex and the savings are unclear for multi-file calls.
if paths := request.GetStringSlice("paths", nil); len(paths) > 0 {
var output strings.Builder
// Dedup: track etag -> first path that produced it.
seenEtag := make(map[string]string) // etag -> first path
for i, p := range paths {
if i > 0 {
output.WriteString("\n")
}
result, err := s.readOneFile(ctx, request, p)
if err != nil {
output.WriteString(fmt.Sprintf("--- %s ---\n[error: %s]\n", p, errors.SanitizeError(err)))
continue
}
// Extract etag from result footer for dedup check.
etag := extractEtag(result)
if etag != "" {
if firstPath, seen := seenEtag[etag]; seen {
// Duplicate content: emit pointer instead of full content.
output.WriteString(fmt.Sprintf("--- %s ---\n[duplicate of %s, etag: %s]\n", p, firstPath, etag))
continue
}
seenEtag[etag] = p
}
output.WriteString(fmt.Sprintf("--- %s ---\n%s", p, result))
}
return mcp.NewToolResultText(output.String()), nil
}
path := request.GetString("path", "")
if path == "" {
return mcp.NewToolResultError("path or paths is required"), nil
}
result, err := s.readOneFile(ctx, request, path)
if err != nil {
return mcp.NewToolResultError(errors.SanitizeError(err)), nil
}
// Resource-link threshold check: for single-file reads, if the result
// exceeds the configured threshold, return a ResourceLink instead of
// inlining the content. The client can fetch the resource on demand.
// Bypassed when:
// - force_inline=true
// - max_inline_bytes is set and result fits within it
// - threshold is 0 (disabled)
// - result is already small (skeleton/symbols_only/line-range paths
// produce small output; threshold is on result bytes, not file bytes)
// Determine resource-link threshold: session pref overrides cfg, per-call overrides session.
threshold := s.cfg.ResourceLinkThresholdBytes
if sp := s.sessionPrefs.Load(); sp != nil && sp.ResourceLinkThreshold > 0 {
threshold = sp.ResourceLinkThreshold
}
forceInline := request.GetBool("force_inline", false)
maxInlineBytes := request.GetInt("max_inline_bytes", 0)
if maxInlineBytes > 0 {
threshold = maxInlineBytes
}
if !forceInline && threshold > 0 && len(result) > threshold {
etag := extractEtag(result)
uri := buildReadResourceURI(path, etag)
lineCount := strings.Count(result, "\n")
desc := fmt.Sprintf("etag=%s, size=%d bytes, lines=%d", etag, len(result), lineCount)
mimeType := detectMIMEType(path)
link := mcp.NewResourceLink(uri, path, desc, mimeType)
return &mcp.CallToolResult{
Content: []mcp.Content{link},
}, nil
}
return mcp.NewToolResultText(result), nil
}
// buildReadResourceURI constructs the filepuff://read URI for a file + etag pair.
func buildReadResourceURI(path, etag string) string {
if etag == "" {
return "filepuff://read/" + path
}
return "filepuff://read/" + path + "?etag=" + etag
}
// detectMIMEType returns a best-effort MIME type for the given file path.
func detectMIMEType(path string) string {
ext := strings.ToLower(path)
switch {
case strings.HasSuffix(ext, ".go"):
return "text/x-go"
case strings.HasSuffix(ext, ".ts"), strings.HasSuffix(ext, ".tsx"):
return "text/typescript"
case strings.HasSuffix(ext, ".js"), strings.HasSuffix(ext, ".jsx"):
return "text/javascript"
case strings.HasSuffix(ext, ".py"):
return "text/x-python"
case strings.HasSuffix(ext, ".rs"):
return "text/x-rust"
case strings.HasSuffix(ext, ".md"):
return "text/markdown"
case strings.HasSuffix(ext, ".json"):
return "application/json"
case strings.HasSuffix(ext, ".yaml"), strings.HasSuffix(ext, ".yml"):
return "text/yaml"
case strings.HasSuffix(ext, ".toml"):
return "text/toml"
case strings.HasSuffix(ext, ".html"), strings.HasSuffix(ext, ".htm"):
return "text/html"
case strings.HasSuffix(ext, ".css"):
return "text/css"
case strings.HasSuffix(ext, ".sh"):
return "text/x-sh"
case strings.HasSuffix(ext, ".c"), strings.HasSuffix(ext, ".h"):
return "text/x-c"
case strings.HasSuffix(ext, ".cpp"), strings.HasSuffix(ext, ".cc"), strings.HasSuffix(ext, ".cxx"):
return "text/x-c++"
default:
return "text/plain"
}
}
// lineNumberOpts holds resolved line-numbering preferences for readOneFile.
type lineNumberOpts struct {
noLineNumbers bool
compactLineNums bool
lineInterval int
}
// resolveLineNumberOpts resolves per-call vs session-pref line-number options.
func (s *Server) resolveLineNumberOpts(request mcp.CallToolRequest) lineNumberOpts {
opts := lineNumberOpts{
noLineNumbers: request.GetBool("no_line_numbers", false),
lineInterval: request.GetInt("line_number_interval", 1),
compactLineNums: request.GetBool("compact_line_numbers", false),
}
// Apply session line_numbers pref when no explicit per-call override was supplied.
if sp := s.sessionPrefs.Load(); sp != nil && sp.LineNumbers != "" {
_, hasNoLN := request.GetArguments()["no_line_numbers"]
_, hasCompact := request.GetArguments()["compact_line_numbers"]
_, hasInterval := request.GetArguments()["line_number_interval"]
if !hasNoLN && !hasCompact && !hasInterval {
switch sp.LineNumbers {
case "none":
opts.noLineNumbers = true
opts.compactLineNums = false
case "compact":
opts.noLineNumbers = false
opts.compactLineNums = true
case "full":
opts.noLineNumbers = false
opts.compactLineNums = false
}
}
}
if opts.lineInterval == 0 {
opts.noLineNumbers = true
}
return opts
}
// applyStrip applies strip flags to the selected line range and returns the
// possibly-rewritten lines, new bounds, and a stripped-footer annotation.
func applyStrip(lines []string, lineStart, lineEnd int, stripFlags []parser.StripFlag, path string) (newLines []string, newStart, newEnd int, footer string) {
if len(stripFlags) == 0 {
return lines, lineStart, lineEnd, ""
}
selectedContent := strings.Join(lines[lineStart-1:lineEnd], "\n")
lang := protocol.DetectLanguage(path)
stripped := parser.StripContent(selectedContent, stripFlags, lang)
if len(stripped.Stripped) > 0 {
names := make([]string, len(stripped.Stripped))
for i, f := range stripped.Stripped {
names[i] = string(f)
}
footer = "[stripped: " + strings.Join(names, ", ") + "]\n"
}
newLines = splitLines(stripped.Content)
return newLines, 1, len(newLines), footer
}
// loadFileForRead performs workspace, stat, size, and read checks for a path.
func (s *Server) loadFileForRead(path string) ([]byte, error) {
if !s.cfg.IsPathAllowed(path) {
return nil, fmt.Errorf("path is outside workspace root")
}
info, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("file not found: %s", path)
}
if os.IsPermission(err) {
return nil, fmt.Errorf("permission denied: %s", path)
}
s.logger.Warn("file stat error", "path", path, "error", err)
return nil, fmt.Errorf("error accessing file")
}
if info.Size() > s.cfg.MaxFileSize {
return nil, fmt.Errorf("file too large (%d bytes, max %d)", info.Size(), s.cfg.MaxFileSize)
}
content, err := os.ReadFile(path)
if err != nil {
if os.IsPermission(err) {
return nil, fmt.Errorf("permission denied: %s", path)
}
s.logger.Warn("file read error", "path", path, "error", err)
return nil, fmt.Errorf("error reading file")
}
return content, nil
}
// readOneFile reads a single file applying all formatting options from the request.
func (s *Server) readOneFile(ctx context.Context, request mcp.CallToolRequest, path string) (string, error) {
content, err := s.loadFileForRead(path)
if err != nil {
return "", err
}
// Feature 3: short etag — 8 hex chars (32-bit).
// Accept previous_etag by prefix match so old 16-char etags keep working.
fullHash := fmt.Sprintf("%016x", xxhash.Sum64(content))
etag := fullHash[:8]
if prev := request.GetString("previous_etag", ""); prev != "" {
// Match: exact 8-char match, old client sent full 16-char etag, or new client sent 8-char prefix of old.
if prev == etag || strings.HasPrefix(fullHash, prev) || strings.HasPrefix(prev, etag) {
return fmt.Sprintf("[unchanged, etag: %s]\n", etag), nil
}
}
// Parse request options
includeAST := request.GetBool("include_ast", false)
symbolsOnly := request.GetBool("symbols_only", false)
symbolName := request.GetString("symbol_name", "")
collapseBlank := request.GetBool("collapse_blank_lines", false)
maxLines := request.GetInt("max_lines", 0)
lnOpts := s.resolveLineNumberOpts(request)
// Feature 1: mode flag — "full" (default) | "skeleton" | "symbols_only".
// symbols_only mode is an alias for include_ast+symbols_only.
mode := request.GetString("mode", "full")
if mode == "symbols_only" {
symbolsOnly = true
includeAST = true
}
// Feature 2: strip — remove selected content classes before line-numbering.
stripRaw := request.GetStringSlice("strip", nil)
var stripFlags []parser.StripFlag
for _, sf := range stripRaw {
stripFlags = append(stripFlags, parser.StripFlag(sf))
}
if symbolsOnly && !includeAST {
return "", fmt.Errorf("symbols_only requires include_ast=true")
}
lines := splitLines(string(content))
lineStart := request.GetInt("line_start", 1)
lineEnd := request.GetInt("line_end", len(lines))
// Symbol-based line range: find the symbol and use its exact bounds.
if symbolName != "" {
symbolKind := protocol.SymbolKind(request.GetString("symbol_kind", ""))
start, end, found := s.resolveSymbolLines(ctx, path, content, symbolName, symbolKind)
if !found {
return "", fmt.Errorf("symbol %q not found in %s", symbolName, path)
}
lineStart = start
lineEnd = end
}
// Clamp to valid range.
if lineStart < 1 {
lineStart = 1
}
if lineEnd > len(lines) {
lineEnd = len(lines)
}
if lineStart > lineEnd {
lineStart = lineEnd
}
var output strings.Builder
// Feature 1: skeleton mode — replace function bodies with { ... }.
if mode == "skeleton" {
skText, _, skErr := parser.SkeletonFile(ctx, s.parser, path, content)
if skErr != nil {
// Fall back to full mode on parse error.
s.logger.Warn("skeleton mode failed, falling back to full", "path", path, "error", skErr)
} else {
output.WriteString(skText)
fmt.Fprintf(&output, "[etag: %s]\n", etag)
return output.String(), nil
}
}
if includeAST {
if summary := s.generateASTSummary(ctx, path, content); summary != "" {
output.WriteString(summary)
if !symbolsOnly {
output.WriteString("\n---\n\n")
}
}
}
if symbolsOnly {
fmt.Fprintf(&output, "[etag: %s]\n", etag)
return output.String(), nil
}
// Feature 2: apply strip AFTER line-range selection, BEFORE line numbering.
lines, lineStart, lineEnd, strippedFooter := applyStrip(lines, lineStart, lineEnd, stripFlags, path)
writeLines(&output, lines, lineStart, lineEnd, maxLines, lnOpts.noLineNumbers, lnOpts.lineInterval, collapseBlank, lnOpts.compactLineNums)
if strippedFooter != "" {
output.WriteString(strippedFooter)
}
fmt.Fprintf(&output, "[etag: %s]\n", etag)
return output.String(), nil
}
// resolveSymbolLines parses the AST and returns the line range of the named symbol.
// symbolKind optionally filters by kind (empty = any).
func (s *Server) resolveSymbolLines(ctx context.Context, path string, content []byte, symbolName string, symbolKind protocol.SymbolKind) (startLine, endLine int, found bool) {
result, err := s.parser.Parse(ctx, path, content)
if err != nil {
return
}
return parser.FindSymbolRange(result.Tree, content, path, symbolName, symbolKind)
}
// writeLines writes the selected line range into output, applying all formatting options.
// compactLineNums=true emits "12│" instead of " 12│ " (no padding, no trailing space).
func writeLines(output *strings.Builder, lines []string, lineStart, lineEnd, maxLines int, noLineNumbers bool, lineInterval int, collapseBlank bool, compactLineNums bool) {
effectiveEnd := lineEnd
truncatedCount := 0
if maxLines > 0 && (lineEnd-lineStart+1) > maxLines {
effectiveEnd = lineStart + maxLines - 1
truncatedCount = lineEnd - effectiveEnd
}
prevBlank := false
for i := lineStart - 1; i < effectiveEnd && i < len(lines); i++ {
line := lines[i]
isBlank := strings.TrimSpace(line) == ""
if collapseBlank && isBlank && prevBlank {
continue
}
prevBlank = isBlank
lineNum := i + 1
switch {
case noLineNumbers:
output.WriteString(line + "\n")
case compactLineNums:
// Feature 4: compact prefix — "12│content"
if lineInterval <= 1 || lineNum%lineInterval == 0 || i == lineStart-1 || i == effectiveEnd-1 {
fmt.Fprintf(output, "%d│%s\n", lineNum, line)
} else {
fmt.Fprintf(output, "│%s\n", line)
}
case lineInterval <= 1 || lineNum%lineInterval == 0 || i == lineStart-1 || i == effectiveEnd-1:
fmt.Fprintf(output, "%4d│ %s\n", lineNum, line)
default:
fmt.Fprintf(output, " │ %s\n", line)
}
}
if truncatedCount > 0 {
fmt.Fprintf(output, "\n[... %d more lines omitted. Use line_start/line_end or increase max_lines to see more]\n", truncatedCount)
}
}
// extractEtag extracts the etag value from a readOneFile result string.
// Returns empty string if not found.
func extractEtag(result string) string {
// Look for "[etag: XXXXXXXX]" at end of result.
const prefix = "[etag: "
idx := strings.LastIndex(result, prefix)
if idx < 0 {
return ""
}
rest := result[idx+len(prefix):]
close := strings.Index(rest, "]")
if close < 0 {
return ""
}
return rest[:close]
}
// splitLines splits a string into lines.
// For large files (> 1MB), uses bufio.Scanner which is more memory efficient.
// For smaller files, uses simple string split which is faster.
func splitLines(s string) []string {
const largeSizeThreshold = 1024 * 1024 // 1MB
if len(s) > largeSizeThreshold {
scanner := bufio.NewScanner(strings.NewReader(s))
scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), 1024*1024)
var lines []string
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
return strings.Split(s, "\n")
}
if len(s) > 0 && s[len(s)-1] == '\n' {
lines = append(lines, "")
}
return lines
}
return strings.Split(s, "\n")
}