mirror of
https://github.com/lukaszraczylo/filepuff-mcp.git
synced 2026-06-05 22:23:50 +00:00
5ad975ee7a
* v2.0: token-optimization overhaul Additive (backward-compatible flags): - file_read: skeleton mode, strip (imports/license/block_comments), compact_line_numbers, 8-char etag with prefix-match compat - ast_query: format=verbose|compact|location, pagination cursor - file_search: cluster mode, pagination cursor - lsp_query (references): compact output Breaking (v2): - Preambles removed; opt-in verbose=true restores - edit_apply: response=count|diff|none, default count - ping tool removed - symbol_at/find_definition/find_references merged into lsp_query - Tool descriptions trimmed -83%, help moved to filepuff://help/<tool> - Batch file_read dedups by etag Protocol: - ResourceLink returned for file_read >64 KiB (force_inline override) - OnAfterInitialize hook reads capabilities.experimental.filepuff for session defaults (default_format, default_max_results, default_cluster, compact_refs, line_numbers, resource_link_threshold) * fix: drop --max-total-count from ripgrep args The flag does not exist in stable ripgrep (confirmed up to 15.1.0 -- "unrecognized flag --max-total-count, similar flags that are available: --max-count"). Every file_search call failed on hosts with stock rg. --max-count is per-file, not a drop-in replacement, so rely on the in-process truncation in parseOutput that was already the documented safety net.
572 lines
16 KiB
Go
572 lines
16 KiB
Go
// Package query implements a hybrid AST query language with pattern matching.
|
|
package query
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/lukaszraczylo/mcp-filepuff/internal/parser"
|
|
"github.com/lukaszraczylo/mcp-filepuff/internal/util"
|
|
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
|
|
sitter "github.com/smacker/go-tree-sitter"
|
|
)
|
|
|
|
// ASTQuery defines a query for matching AST patterns.
|
|
type ASTQuery struct {
|
|
Pattern string `json:"pattern"` // code pattern with $VAR placeholders
|
|
Language string `json:"language"` // required
|
|
Filters QueryFilters `json:"filters,omitempty"`
|
|
}
|
|
|
|
// QueryFilters provide additional filtering criteria.
|
|
type QueryFilters struct {
|
|
NameMatches string `json:"name_matches,omitempty"`
|
|
NameExact string `json:"name_exact,omitempty"`
|
|
InFile string `json:"in_file,omitempty"`
|
|
NotInFile string `json:"not_in_file,omitempty"`
|
|
KindIn []string `json:"kind_in,omitempty"`
|
|
}
|
|
|
|
// MatchResult represents a single match from a query.
|
|
type MatchResult struct {
|
|
Node *sitter.Node
|
|
Captures map[string]CapturedNode
|
|
File string
|
|
Text string
|
|
Location protocol.Location
|
|
}
|
|
|
|
// CapturedNode represents a captured node or nodes.
|
|
type CapturedNode struct {
|
|
Text string
|
|
Nodes []*sitter.Node
|
|
}
|
|
|
|
// CaptureType indicates the type of capture.
|
|
type CaptureType int
|
|
|
|
const (
|
|
CaptureSingle CaptureType = iota // $NAME - single node
|
|
CaptureMultiple // $$$NAME - multiple nodes
|
|
CaptureWildcard // $_ - wildcard (don't capture)
|
|
)
|
|
|
|
// Capture represents a placeholder in a pattern.
|
|
type Capture struct {
|
|
Name string
|
|
Type CaptureType
|
|
Position int // position in the pattern
|
|
}
|
|
|
|
// ParsedPattern represents a parsed code pattern.
|
|
type ParsedPattern struct {
|
|
Original string
|
|
Template string
|
|
Captures []Capture
|
|
}
|
|
|
|
// Matcher performs AST pattern matching.
|
|
type Matcher struct {
|
|
registry *parser.Registry
|
|
}
|
|
|
|
// NewMatcher creates a new pattern matcher.
|
|
func NewMatcher(registry *parser.Registry) *Matcher {
|
|
return &Matcher{registry: registry}
|
|
}
|
|
|
|
// ParsePattern parses a pattern string and extracts captures.
|
|
func ParsePattern(pattern string) (*ParsedPattern, error) {
|
|
if pattern == "" {
|
|
return nil, fmt.Errorf("empty pattern")
|
|
}
|
|
|
|
var captures []Capture
|
|
template := pattern
|
|
captureID := 0
|
|
|
|
// Find all captures: $$$ (multi), $_ (wildcard), $NAME (single)
|
|
// Order matters: check $$$ first
|
|
multiRe := regexp.MustCompile(`\$\$\$([A-Za-z_][A-Za-z0-9_]*)`)
|
|
wildcardRe := regexp.MustCompile(`\$_`)
|
|
singleRe := regexp.MustCompile(`\$([A-Za-z_][A-Za-z0-9_]*)`)
|
|
|
|
// Extract multi-node captures ($$$NAME)
|
|
for _, match := range multiRe.FindAllStringSubmatchIndex(pattern, -1) {
|
|
name := pattern[match[2]:match[3]]
|
|
captures = append(captures, Capture{
|
|
Name: name,
|
|
Type: CaptureMultiple,
|
|
Position: match[0],
|
|
})
|
|
}
|
|
|
|
// Replace multi-captures with placeholder identifiers
|
|
template = multiRe.ReplaceAllStringFunc(template, func(s string) string {
|
|
captureID++
|
|
return fmt.Sprintf("__multi_%d__", captureID)
|
|
})
|
|
|
|
// Extract wildcards ($_)
|
|
for _, match := range wildcardRe.FindAllStringIndex(pattern, -1) {
|
|
captures = append(captures, Capture{
|
|
Name: "_",
|
|
Type: CaptureWildcard,
|
|
Position: match[0],
|
|
})
|
|
}
|
|
|
|
// Replace wildcards with placeholder identifiers
|
|
template = wildcardRe.ReplaceAllStringFunc(template, func(s string) string {
|
|
captureID++
|
|
return fmt.Sprintf("__wild_%d__", captureID)
|
|
})
|
|
|
|
// Extract single-node captures ($NAME) - exclude those that are part of $$$NAME
|
|
// Check which $NAME patterns are not preceded by $$
|
|
remaining := template
|
|
for _, match := range singleRe.FindAllStringSubmatchIndex(remaining, -1) {
|
|
name := remaining[match[2]:match[3]]
|
|
// Skip if this looks like our placeholder
|
|
if strings.HasPrefix(name, "_multi_") || strings.HasPrefix(name, "_wild_") {
|
|
continue
|
|
}
|
|
captures = append(captures, Capture{
|
|
Name: name,
|
|
Type: CaptureSingle,
|
|
Position: match[0],
|
|
})
|
|
}
|
|
|
|
// Replace single captures with placeholder identifiers
|
|
template = singleRe.ReplaceAllStringFunc(template, func(s string) string {
|
|
name := strings.TrimPrefix(s, "$")
|
|
if strings.HasPrefix(name, "_multi_") || strings.HasPrefix(name, "_wild_") {
|
|
return s // keep our placeholders as is
|
|
}
|
|
captureID++
|
|
return fmt.Sprintf("__single_%d__", captureID)
|
|
})
|
|
|
|
return &ParsedPattern{
|
|
Original: pattern,
|
|
Captures: captures,
|
|
Template: template,
|
|
}, nil
|
|
}
|
|
|
|
// Match executes a query against a parsed tree.
|
|
func (m *Matcher) Match(ctx context.Context, query *ASTQuery, tree *sitter.Tree, content []byte, filename string) ([]MatchResult, error) {
|
|
if query.Pattern == "" {
|
|
return nil, fmt.Errorf("query pattern is required")
|
|
}
|
|
|
|
lang := protocol.Language(query.Language)
|
|
if lang == "" || lang == protocol.LangUnknown {
|
|
return nil, fmt.Errorf("valid language is required")
|
|
}
|
|
|
|
// Parse the pattern
|
|
parsed, err := ParsePattern(query.Pattern)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid pattern: %w", err)
|
|
}
|
|
|
|
var results []MatchResult
|
|
|
|
// Walk the tree and find matches
|
|
root := tree.RootNode()
|
|
if root == nil {
|
|
return results, nil
|
|
}
|
|
|
|
parser.WalkTree(root, func(n *sitter.Node) bool {
|
|
// Check for context cancellation
|
|
select {
|
|
case <-ctx.Done():
|
|
return false
|
|
default:
|
|
}
|
|
|
|
// Try to match this node against the pattern
|
|
if matched, captures := matchNode(n, parsed, content); matched {
|
|
// Apply filters
|
|
if !passesFilters(n, query.Filters, content) {
|
|
return true // continue walking
|
|
}
|
|
|
|
startPoint := n.StartPoint()
|
|
results = append(results, MatchResult{
|
|
Node: n,
|
|
Captures: captures,
|
|
File: filename,
|
|
Location: protocol.Location{
|
|
Line: int(startPoint.Row) + 1,
|
|
Column: int(startPoint.Column) + 1,
|
|
},
|
|
Text: parser.GetNodeText(n, content),
|
|
})
|
|
}
|
|
|
|
return true
|
|
})
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// matchNode attempts to match a node against a parsed pattern.
|
|
// This is a simplified matcher that looks for structural similarity.
|
|
func matchNode(node *sitter.Node, pattern *ParsedPattern, content []byte) (bool, map[string]CapturedNode) {
|
|
if node == nil {
|
|
return false, nil
|
|
}
|
|
|
|
captures := make(map[string]CapturedNode)
|
|
|
|
// Use pattern keyword matching as a heuristic to find matching nodes
|
|
// A full implementation would parse both pattern and node and compare AST structure
|
|
matched := matchPatternHeuristic(node, pattern, content, captures)
|
|
|
|
return matched, captures
|
|
}
|
|
|
|
// matchPatternHeuristic uses heuristics to match patterns.
|
|
// This is a simplified implementation that matches based on node type and structure.
|
|
func matchPatternHeuristic(node *sitter.Node, pattern *ParsedPattern, content []byte, captures map[string]CapturedNode) bool {
|
|
patternLower := strings.ToLower(pattern.Original)
|
|
nodeType := node.Type()
|
|
|
|
// Match function patterns
|
|
if strings.Contains(patternLower, "func ") || strings.Contains(patternLower, "function ") {
|
|
if nodeType != "function_declaration" && nodeType != "method_declaration" && nodeType != "function_definition" {
|
|
return false
|
|
}
|
|
extractFunctionCaptures(node, pattern.Captures, content, captures)
|
|
return true
|
|
}
|
|
|
|
// Match class patterns
|
|
if strings.Contains(patternLower, "class ") {
|
|
if nodeType != "class_declaration" && nodeType != "class_definition" {
|
|
return false
|
|
}
|
|
extractClassCaptures(node, pattern.Captures, content, captures)
|
|
return true
|
|
}
|
|
|
|
// Match struct patterns (Go, C, C++)
|
|
if (strings.Contains(patternLower, "struct ") || strings.Contains(patternLower, "type ")) && strings.Contains(patternLower, "struct") {
|
|
if nodeType != "type_declaration" && nodeType != "struct_specifier" {
|
|
return false
|
|
}
|
|
extractStructCaptures(node, pattern.Captures, content, captures)
|
|
return true
|
|
}
|
|
|
|
// Match interface patterns (Go, TypeScript)
|
|
if strings.Contains(patternLower, "interface ") {
|
|
if nodeType != "interface_declaration" && nodeType != "type_declaration" {
|
|
return false
|
|
}
|
|
extractInterfaceCaptures(node, pattern.Captures, content, captures)
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// extractFunctionCaptures extracts captures from a function node.
|
|
func extractFunctionCaptures(node *sitter.Node, capturesDef []Capture, content []byte, captures map[string]CapturedNode) {
|
|
for _, cap := range capturesDef {
|
|
switch cap.Name {
|
|
case "NAME", "name":
|
|
if nameNode := node.ChildByFieldName("name"); nameNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{nameNode},
|
|
Text: parser.GetNodeText(nameNode, content),
|
|
}
|
|
}
|
|
case "ARGS", "args", "PARAMS", "params":
|
|
if paramsNode := node.ChildByFieldName("parameters"); paramsNode != nil {
|
|
var paramNodes []*sitter.Node
|
|
for i := 0; i < int(paramsNode.NamedChildCount()); i++ {
|
|
paramNodes = append(paramNodes, paramsNode.NamedChild(i))
|
|
}
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: paramNodes,
|
|
Text: parser.GetNodeText(paramsNode, content),
|
|
}
|
|
}
|
|
case "BODY", "body":
|
|
if bodyNode := node.ChildByFieldName("body"); bodyNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{bodyNode},
|
|
Text: parser.GetNodeText(bodyNode, content),
|
|
}
|
|
}
|
|
case "RETURN", "return", "RESULT", "result":
|
|
if resultNode := node.ChildByFieldName("result"); resultNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{resultNode},
|
|
Text: parser.GetNodeText(resultNode, content),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// extractClassCaptures extracts captures from a class node.
|
|
func extractClassCaptures(node *sitter.Node, capturesDef []Capture, content []byte, captures map[string]CapturedNode) {
|
|
for _, cap := range capturesDef {
|
|
switch cap.Name {
|
|
case "NAME", "name":
|
|
if nameNode := node.ChildByFieldName("name"); nameNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{nameNode},
|
|
Text: parser.GetNodeText(nameNode, content),
|
|
}
|
|
}
|
|
case "BODY", "body":
|
|
if bodyNode := node.ChildByFieldName("body"); bodyNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{bodyNode},
|
|
Text: parser.GetNodeText(bodyNode, content),
|
|
}
|
|
}
|
|
case "EXTENDS", "extends", "SUPERCLASS", "superclass":
|
|
if extendsNode := node.ChildByFieldName("superclass"); extendsNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{extendsNode},
|
|
Text: parser.GetNodeText(extendsNode, content),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// extractStructCaptures extracts captures from a struct node.
|
|
func extractStructCaptures(node *sitter.Node, capturesDef []Capture, content []byte, captures map[string]CapturedNode) {
|
|
for _, cap := range capturesDef {
|
|
switch cap.Name {
|
|
case "NAME", "name":
|
|
// For Go type_declaration, we need to look at the type_spec child
|
|
if node.Type() == "type_declaration" {
|
|
for i := 0; i < int(node.NamedChildCount()); i++ {
|
|
child := node.NamedChild(i)
|
|
if child != nil && child.Type() == "type_spec" {
|
|
if nameNode := child.ChildByFieldName("name"); nameNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{nameNode},
|
|
Text: parser.GetNodeText(nameNode, content),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if nameNode := node.ChildByFieldName("name"); nameNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{nameNode},
|
|
Text: parser.GetNodeText(nameNode, content),
|
|
}
|
|
}
|
|
case "FIELDS", "fields", "BODY", "body":
|
|
if bodyNode := node.ChildByFieldName("body"); bodyNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{bodyNode},
|
|
Text: parser.GetNodeText(bodyNode, content),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// extractInterfaceCaptures extracts captures from an interface node.
|
|
func extractInterfaceCaptures(node *sitter.Node, capturesDef []Capture, content []byte, captures map[string]CapturedNode) {
|
|
for _, cap := range capturesDef {
|
|
switch cap.Name {
|
|
case "NAME", "name":
|
|
if nameNode := node.ChildByFieldName("name"); nameNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{nameNode},
|
|
Text: parser.GetNodeText(nameNode, content),
|
|
}
|
|
}
|
|
case "BODY", "body", "METHODS", "methods":
|
|
if bodyNode := node.ChildByFieldName("body"); bodyNode != nil {
|
|
captures[cap.Name] = CapturedNode{
|
|
Nodes: []*sitter.Node{bodyNode},
|
|
Text: parser.GetNodeText(bodyNode, content),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// passesFilters checks if a node passes all the specified filters.
|
|
func passesFilters(node *sitter.Node, filters QueryFilters, content []byte) bool {
|
|
// Name regex filter (uses cached compilation)
|
|
if filters.NameMatches != "" {
|
|
nameNode := node.ChildByFieldName("name")
|
|
if nameNode == nil {
|
|
return false
|
|
}
|
|
name := parser.GetNodeText(nameNode, content)
|
|
re, err := util.CompileRegex(filters.NameMatches)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
if !re.MatchString(name) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Exact name filter
|
|
if filters.NameExact != "" {
|
|
nameNode := node.ChildByFieldName("name")
|
|
if nameNode == nil {
|
|
return false
|
|
}
|
|
name := parser.GetNodeText(nameNode, content)
|
|
if name != filters.NameExact {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Kind filter
|
|
if len(filters.KindIn) > 0 {
|
|
nodeType := node.Type()
|
|
found := false
|
|
for _, kind := range filters.KindIn {
|
|
if nodeType == kind {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// FormatResults formats match results for display (backward-compat wrapper, verbose mode).
|
|
func FormatResults(results []MatchResult, maxResults int) string {
|
|
return FormatResultsWithOptions(results, maxResults, "verbose", 0)
|
|
}
|
|
|
|
// FormatResultsWithOptions formats match results with configurable output format.
|
|
// format: "verbose" (default) | "compact" | "location"
|
|
// offset: skip this many results before rendering (used for cursor pagination).
|
|
// verbose: opt-in variadic — pass true to restore "Found N match(es):" preamble (v1 behaviour).
|
|
func FormatResultsWithOptions(results []MatchResult, maxResults int, format string, offset int, verbose ...bool) string {
|
|
if len(results) == 0 {
|
|
return "No matches found."
|
|
}
|
|
|
|
// Apply offset (pagination skip).
|
|
if offset > 0 {
|
|
if offset >= len(results) {
|
|
return "No matches found."
|
|
}
|
|
results = results[offset:]
|
|
}
|
|
|
|
// Determine how many to render and whether more remain.
|
|
renderCount := len(results)
|
|
remaining := 0
|
|
if maxResults > 0 && renderCount > maxResults {
|
|
remaining = renderCount - maxResults
|
|
renderCount = maxResults
|
|
}
|
|
|
|
var sb strings.Builder
|
|
|
|
// Emit preamble only when verbose=true is explicitly passed (opt-in, default off).
|
|
wantVerbose := len(verbose) > 0 && verbose[0]
|
|
if wantVerbose {
|
|
sb.WriteString(fmt.Sprintf("Found %d match(es):\n", renderCount))
|
|
}
|
|
|
|
switch format {
|
|
case "compact":
|
|
for i := 0; i < renderCount; i++ {
|
|
r := results[i]
|
|
nodeType := "unknown"
|
|
if r.Node != nil {
|
|
nodeType = r.Node.Type()
|
|
}
|
|
firstLine := firstLineOf(r.Text, 80)
|
|
sb.WriteString(fmt.Sprintf("%s:%d (%s) %s\n", r.File, r.Location.Line, nodeType, firstLine))
|
|
}
|
|
|
|
case "location":
|
|
for i := 0; i < renderCount; i++ {
|
|
r := results[i]
|
|
sb.WriteString(fmt.Sprintf("%s:%d\n", r.File, r.Location.Line))
|
|
}
|
|
|
|
default: // "verbose"
|
|
for i := 0; i < renderCount; i++ {
|
|
r := results[i]
|
|
nodeType := "unknown"
|
|
if r.Node != nil {
|
|
nodeType = r.Node.Type()
|
|
}
|
|
sb.WriteString(fmt.Sprintf("**%s:%d** (%s)\n", r.File, r.Location.Line, nodeType))
|
|
|
|
// Truncate very long text
|
|
text := r.Text
|
|
if len(text) > 500 {
|
|
text = text[:500] + "..."
|
|
}
|
|
sb.WriteString("```\n")
|
|
sb.WriteString(text)
|
|
sb.WriteString("\n```\n")
|
|
|
|
// Show captures
|
|
if len(r.Captures) > 0 {
|
|
sb.WriteString("Captures: ")
|
|
first := true
|
|
for name, cap := range r.Captures {
|
|
if !first {
|
|
sb.WriteString(", ")
|
|
}
|
|
first = false
|
|
capText := cap.Text
|
|
if len(capText) > 50 {
|
|
capText = capText[:50] + "..."
|
|
}
|
|
sb.WriteString(fmt.Sprintf("$%s=%s", name, capText))
|
|
}
|
|
sb.WriteString("\n")
|
|
}
|
|
sb.WriteString("\n")
|
|
}
|
|
}
|
|
|
|
if remaining > 0 {
|
|
// Caller must embed the cursor token; we just append the remaining count hint.
|
|
// The actual [cursor: ...] line is written by the handler after calling MakeCursor.
|
|
sb.WriteString(fmt.Sprintf("[remaining: %d]\n", remaining))
|
|
}
|
|
|
|
return sb.String()
|
|
}
|
|
|
|
// firstLineOf returns the first non-empty line of s, trimmed and capped at maxLen chars.
|
|
func firstLineOf(s string, maxLen int) string {
|
|
for _, line := range strings.Split(s, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
if len(line) > maxLen {
|
|
return line[:maxLen-1] + "…"
|
|
}
|
|
return line
|
|
}
|
|
return ""
|
|
}
|