mirror of
https://github.com/lukaszraczylo/filepuff-mcp.git
synced 2026-06-08 22:49:14 +00:00
9af2801b1b
- [x] Remove auto-indentation from text mode edits (caller controls whitespace) - [x] Add line-ending detection and normalization for both AST and text modes - [x] Share edit logic via new `spliceContent` function for both modes - [x] Fix diff to emit "No newline at end of file" markers - [x] Fix diff to strip raw CR from CRLF file output - [x] Remove double-unescape of backslash sequences in new_content - [x] Fix countDiffLines to be hunk-aware (correctly count lines starting with +/-) - [x] Fix block-comment stripping to remove standalone lines cleanly - [x] Fix Python license header stripping to preserve separator blank lines
348 lines
9.4 KiB
Go
348 lines
9.4 KiB
Go
package parser
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
|
|
)
|
|
|
|
// StripFlag names the categories of content to remove.
|
|
type StripFlag string
|
|
|
|
const (
|
|
StripImports StripFlag = "imports"
|
|
StripLicense StripFlag = "license"
|
|
StripBlockComments StripFlag = "block_comments"
|
|
)
|
|
|
|
// StripResult holds the stripped content and which flags actually removed content.
|
|
type StripResult struct {
|
|
Content string
|
|
Stripped []StripFlag
|
|
}
|
|
|
|
// StripContent applies requested strip operations to content, in order:
|
|
// license → imports → block_comments.
|
|
// lang is used to pick language-specific heuristics.
|
|
func StripContent(content string, flags []StripFlag, lang protocol.Language) StripResult {
|
|
flagSet := make(map[StripFlag]bool, len(flags))
|
|
for _, f := range flags {
|
|
flagSet[f] = true
|
|
}
|
|
|
|
var stripped []StripFlag
|
|
|
|
if flagSet[StripLicense] {
|
|
next, removed := stripLicense(content)
|
|
if removed {
|
|
content = next
|
|
stripped = append(stripped, StripLicense)
|
|
}
|
|
}
|
|
|
|
if flagSet[StripImports] {
|
|
next, removed := stripImports(content, lang)
|
|
if removed {
|
|
content = next
|
|
stripped = append(stripped, StripImports)
|
|
}
|
|
}
|
|
|
|
if flagSet[StripBlockComments] {
|
|
next, removed := stripBlockComments(content, lang)
|
|
if removed {
|
|
content = next
|
|
stripped = append(stripped, StripBlockComments)
|
|
}
|
|
}
|
|
|
|
return StripResult{Content: content, Stripped: stripped}
|
|
}
|
|
|
|
// stripLicense removes a leading block comment that looks like a license header.
|
|
// A comment qualifies if it contains "copyright", "license", or "spdx-license-identifier" (case-insensitive).
|
|
func stripLicense(content string) (string, bool) {
|
|
trimmed := strings.TrimLeft(content, " \t\n\r")
|
|
|
|
// C-style block comment at top
|
|
if strings.HasPrefix(trimmed, "/*") {
|
|
end := strings.Index(trimmed, "*/")
|
|
if end >= 0 {
|
|
candidate := trimmed[:end+2]
|
|
lower := strings.ToLower(candidate)
|
|
if strings.Contains(lower, "copyright") ||
|
|
strings.Contains(lower, "license") ||
|
|
strings.Contains(lower, "spdx-license-identifier") {
|
|
rest := trimmed[end+2:]
|
|
// Consume trailing newline(s)
|
|
rest = strings.TrimLeft(rest, "\r\n")
|
|
return rest, true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Python/hash-style leading comment block. Only contiguous "#" lines belong to the
|
|
// header; a blank line ends it and is preserved as a separator (rather than being
|
|
// greedily swallowed and collapsed away).
|
|
if strings.HasPrefix(trimmed, "#") {
|
|
lines := strings.Split(trimmed, "\n")
|
|
var commentLines, rest []string
|
|
for i, l := range lines {
|
|
if strings.HasPrefix(l, "#") {
|
|
commentLines = append(commentLines, l)
|
|
continue
|
|
}
|
|
rest = lines[i:]
|
|
break
|
|
}
|
|
lower := strings.ToLower(strings.Join(commentLines, "\n"))
|
|
if strings.Contains(lower, "copyright") ||
|
|
strings.Contains(lower, "license") ||
|
|
strings.Contains(lower, "spdx-license-identifier") {
|
|
return strings.Join(rest, "\n"), true
|
|
}
|
|
}
|
|
|
|
return content, false
|
|
}
|
|
|
|
// stripImports removes top-of-file import blocks, language-specific.
|
|
func stripImports(content string, lang protocol.Language) (string, bool) {
|
|
switch lang {
|
|
case protocol.LangGo:
|
|
return stripGoImports(content)
|
|
case protocol.LangTypeScript, protocol.LangJavaScript:
|
|
return stripTSImports(content)
|
|
case protocol.LangPython:
|
|
return stripPythonImports(content)
|
|
case protocol.LangRust:
|
|
return stripRustImports(content)
|
|
default:
|
|
return content, false
|
|
}
|
|
}
|
|
|
|
// stripGoImports removes Go import(...) or single import "..." declarations.
|
|
func stripGoImports(content string) (string, bool) {
|
|
lines := strings.Split(content, "\n")
|
|
var out []string
|
|
removed := false
|
|
i := 0
|
|
for i < len(lines) {
|
|
trimLine := strings.TrimSpace(lines[i])
|
|
if strings.HasPrefix(trimLine, "import (") || trimLine == "import (" {
|
|
// multi-line import block
|
|
removed = true
|
|
i++ // skip "import ("
|
|
for i < len(lines) {
|
|
if strings.TrimSpace(lines[i]) == ")" {
|
|
i++ // skip closing ")"
|
|
break
|
|
}
|
|
i++
|
|
}
|
|
// skip one blank line after
|
|
if i < len(lines) && strings.TrimSpace(lines[i]) == "" {
|
|
i++
|
|
}
|
|
continue
|
|
}
|
|
if strings.HasPrefix(trimLine, `import "`) || strings.HasPrefix(trimLine, "import `") {
|
|
removed = true
|
|
i++
|
|
continue
|
|
}
|
|
out = append(out, lines[i])
|
|
i++
|
|
}
|
|
if !removed {
|
|
return content, false
|
|
}
|
|
return strings.Join(out, "\n"), true
|
|
}
|
|
|
|
// stripTSImports removes TypeScript/JavaScript "import ... from ..." and "require(...)" lines.
|
|
func stripTSImports(content string) (string, bool) {
|
|
lines := strings.Split(content, "\n")
|
|
var out []string
|
|
removed := false
|
|
for _, l := range lines {
|
|
trimLine := strings.TrimSpace(l)
|
|
if strings.HasPrefix(trimLine, "import ") || strings.HasPrefix(trimLine, "const {") && strings.Contains(trimLine, "require(") {
|
|
removed = true
|
|
continue
|
|
}
|
|
out = append(out, l)
|
|
}
|
|
if !removed {
|
|
return content, false
|
|
}
|
|
return strings.Join(out, "\n"), true
|
|
}
|
|
|
|
// stripPythonImports removes Python "import ..." and "from ... import ..." lines.
|
|
func stripPythonImports(content string) (string, bool) {
|
|
lines := strings.Split(content, "\n")
|
|
var out []string
|
|
removed := false
|
|
for _, l := range lines {
|
|
trimLine := strings.TrimSpace(l)
|
|
if strings.HasPrefix(trimLine, "import ") || strings.HasPrefix(trimLine, "from ") {
|
|
removed = true
|
|
continue
|
|
}
|
|
out = append(out, l)
|
|
}
|
|
if !removed {
|
|
return content, false
|
|
}
|
|
return strings.Join(out, "\n"), true
|
|
}
|
|
|
|
// stripRustImports removes Rust "use ..." declarations.
|
|
func stripRustImports(content string) (string, bool) {
|
|
lines := strings.Split(content, "\n")
|
|
var out []string
|
|
removed := false
|
|
inMulti := false
|
|
for _, l := range lines {
|
|
trimLine := strings.TrimSpace(l)
|
|
if inMulti {
|
|
// look for semicolon terminating multi-line use
|
|
if strings.Contains(trimLine, ";") {
|
|
inMulti = false
|
|
}
|
|
removed = true
|
|
continue
|
|
}
|
|
if strings.HasPrefix(trimLine, "use ") {
|
|
removed = true
|
|
if !strings.HasSuffix(trimLine, ";") {
|
|
inMulti = true
|
|
}
|
|
continue
|
|
}
|
|
out = append(out, l)
|
|
}
|
|
if !removed {
|
|
return content, false
|
|
}
|
|
return strings.Join(out, "\n"), true
|
|
}
|
|
|
|
// stripBlockComments removes /* ... */ block comments (Go/TS/C/Rust)
|
|
// and Python triple-quoted docstrings.
|
|
func stripBlockComments(content string, lang protocol.Language) (string, bool) {
|
|
if lang == protocol.LangPython {
|
|
return stripPythonDocstrings(content)
|
|
}
|
|
return stripCStyleBlockComments(content)
|
|
}
|
|
|
|
// trimTrailingLineWhitespace drops trailing spaces/tabs from out (back to, but not past,
|
|
// the previous newline). Used when a standalone comment line is removed so its leading
|
|
// indentation does not linger as a whitespace-only line.
|
|
func trimTrailingLineWhitespace(out []byte) []byte {
|
|
for len(out) > 0 && (out[len(out)-1] == ' ' || out[len(out)-1] == '\t') {
|
|
out = out[:len(out)-1]
|
|
}
|
|
return out
|
|
}
|
|
|
|
// skipLineTail advances i over trailing spaces/tabs and a CR, then a single LF — i.e. the
|
|
// remainder of a line after a standalone comment's closer, including its \n or \r\n
|
|
// terminator. Returns the new index.
|
|
func skipLineTail(content string, i int) int {
|
|
for i < len(content) && (content[i] == ' ' || content[i] == '\t' || content[i] == '\r') {
|
|
i++
|
|
}
|
|
if i < len(content) && content[i] == '\n' {
|
|
i++
|
|
}
|
|
return i
|
|
}
|
|
|
|
// stripCStyleBlockComments removes /* ... */ comments. A comment that occupies a whole
|
|
// line (only whitespace before it) is removed together with that line's indentation and
|
|
// terminator; an inline comment (code precedes it) is removed in place, leaving the
|
|
// surrounding line — and crucially its terminator — intact so lines are never merged.
|
|
func stripCStyleBlockComments(content string) (string, bool) {
|
|
removed := false
|
|
out := make([]byte, 0, len(content))
|
|
lineHasNonSpace := false
|
|
i := 0
|
|
for i < len(content) {
|
|
if i+1 < len(content) && content[i] == '/' && content[i+1] == '*' {
|
|
if end := strings.Index(content[i+2:], "*/"); end >= 0 {
|
|
removed = true
|
|
standalone := !lineHasNonSpace
|
|
i = i + 2 + end + 2 // advance past closing */
|
|
if standalone {
|
|
out = trimTrailingLineWhitespace(out)
|
|
i = skipLineTail(content, i)
|
|
lineHasNonSpace = false
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
c := content[i]
|
|
switch c {
|
|
case '\n':
|
|
lineHasNonSpace = false
|
|
case ' ', '\t', '\r':
|
|
// whitespace: does not mark the line as having content
|
|
default:
|
|
lineHasNonSpace = true
|
|
}
|
|
out = append(out, c)
|
|
i++
|
|
}
|
|
if !removed {
|
|
return content, false
|
|
}
|
|
return string(out), true
|
|
}
|
|
|
|
// stripPythonDocstrings removes triple-quoted strings (""" and ”'). As with block
|
|
// comments, a standalone docstring line is removed along with its indentation and
|
|
// terminator, while an inline triple-quoted string leaves its line's terminator intact.
|
|
func stripPythonDocstrings(content string) (string, bool) {
|
|
removed := false
|
|
out := make([]byte, 0, len(content))
|
|
lineHasNonSpace := false
|
|
i := 0
|
|
for i < len(content) {
|
|
if i+2 < len(content) {
|
|
triple := content[i : i+3]
|
|
if triple == `"""` || triple == `'''` {
|
|
if end := strings.Index(content[i+3:], triple); end >= 0 {
|
|
removed = true
|
|
standalone := !lineHasNonSpace
|
|
i = i + 3 + end + 3
|
|
if standalone {
|
|
out = trimTrailingLineWhitespace(out)
|
|
i = skipLineTail(content, i)
|
|
lineHasNonSpace = false
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
c := content[i]
|
|
switch c {
|
|
case '\n':
|
|
lineHasNonSpace = false
|
|
case ' ', '\t', '\r':
|
|
// whitespace: does not mark the line as having content
|
|
default:
|
|
lineHasNonSpace = true
|
|
}
|
|
out = append(out, c)
|
|
i++
|
|
}
|
|
if !removed {
|
|
return content, false
|
|
}
|
|
return string(out), true
|
|
}
|