refactor(edit): remove auto-indentation and add line-ending normalization

- [x] Remove auto-indentation from text mode edits (caller controls whitespace)
- [x] Add line-ending detection and normalization for both AST and text modes
- [x] Share edit logic via new `spliceContent` function for both modes
- [x] Fix diff to emit "No newline at end of file" markers
- [x] Fix diff to strip raw CR from CRLF file output
- [x] Remove double-unescape of backslash sequences in new_content
- [x] Fix countDiffLines to be hunk-aware (correctly count lines starting with +/-)
- [x] Fix block-comment stripping to remove standalone lines cleanly
- [x] Fix Python license header stripping to preserve separator blank lines
This commit is contained in:
2026-05-29 00:17:36 +01:00
parent f1643e7b81
commit 9af2801b1b
9 changed files with 596 additions and 334 deletions
+183 -208
View File
@@ -481,98 +481,104 @@ func (e *Engine) matchesSelector(sel ASTSelector, n *sitter.Node, content []byte
return true
}
// applyEdit applies the edit operation to the content.
// AST mode uses exact byte positionsnew_content is inserted verbatim without auto-indentation.
// applyEdit applies an AST-mode edit. new_content is inserted verbatim — the caller
// controls indentation — and only line endings are normalized to the file's convention.
func (e *Engine) applyEdit(edit *ASTEdit, node *sitter.Node, content []byte) ([]byte, error) {
startByte := node.StartByte()
endByte := node.EndByte()
eol := detectLineEnding(content)
newContent := normalizeLineEndings(edit.NewContent, eol)
return spliceContent(edit.Operation, content, int(node.StartByte()), int(node.EndByte()), newContent, eol)
}
newContent := edit.NewContent
// detectLineEnding reports the file's dominant line-ending convention: "\r\n" when any
// CRLF terminator is present, otherwise "\n".
func detectLineEnding(content []byte) string {
if bytes.Contains(content, []byte("\r\n")) {
return "\r\n"
}
return "\n"
}
// normalizeLineEndings rewrites every line ending in s to eol. It first collapses CRLF to
// LF, then expands to the target, so mixed input becomes uniform and new_content can never
// introduce a line ending foreign to the file being edited.
func normalizeLineEndings(s, eol string) string {
if s == "" {
return s
}
s = strings.ReplaceAll(s, "\r\n", "\n")
if eol != "\n" {
s = strings.ReplaceAll(s, "\n", eol)
}
return s
}
func endsWithNewline(s string) bool { return strings.HasSuffix(s, "\n") }
func startsWithNewline(s string) bool {
return s != "" && (s[0] == '\n' || s[0] == '\r')
}
// spliceContent applies an edit operation by splicing newContent into content over the
// byte range [start, end). It is shared by AST and text modes — once auto-indentation is
// removed the two are identical. Restored terminators and separators use eol so the
// file's line-ending convention is preserved.
func spliceContent(op EditOperation, content []byte, start, end int, newContent, eol string) ([]byte, error) {
// A line-based selection on a CRLF file can land `end` between the \r (treated as
// line content) and the \n of a terminator. Pull it back so the full \r\n stays
// intact in the tail and is never split into a bare LF.
if end > start && end < len(content) && content[end-1] == '\r' && content[end] == '\n' {
end--
}
var result []byte
switch edit.Operation {
switch op {
case EditReplace:
result = append(result, content[:startByte]...)
result = append(result, []byte(newContent)...)
// Preserve trailing newline: if selection ended with \n but replacement doesn't,
// re-add it to prevent line merging
if endByte > startByte && content[endByte-1] == '\n' && !strings.HasSuffix(newContent, "\n") {
result = append(result, '\n')
result = append(result, content[:start]...)
result = append(result, newContent...)
// Restore a line terminator if the replaced range ended with one but the
// replacement does not, to prevent merging with the following line.
if end > start && content[end-1] == '\n' && !endsWithNewline(newContent) {
result = append(result, eol...)
}
result = append(result, content[endByte:]...)
result = append(result, content[end:]...)
case EditInsertBefore:
insertion := newContent
if !strings.HasSuffix(insertion, "\n") {
insertion += "\n"
if !endsWithNewline(insertion) {
insertion += eol
}
result = append(result, content[:startByte]...)
result = append(result, []byte(insertion)...)
result = append(result, content[startByte:]...)
result = append(result, content[:start]...)
result = append(result, insertion...)
result = append(result, content[start:]...)
case EditInsertAfter:
insertion := newContent
// Ensure separation from preceding content
if endByte > 0 && content[endByte-1] != '\n' && !strings.HasPrefix(insertion, "\n") {
insertion = "\n" + insertion
// Separate from preceding content.
if end > 0 && content[end-1] != '\n' && !startsWithNewline(insertion) {
insertion = eol + insertion
}
// Ensure separation from following content
if !strings.HasSuffix(insertion, "\n") && endByte < uint32(len(content)) && content[endByte] != '\n' {
insertion += "\n"
// Separate from following content.
if !endsWithNewline(insertion) && end < len(content) && content[end] != '\n' {
insertion += eol
}
result = append(result, content[:endByte]...)
result = append(result, []byte(insertion)...)
result = append(result, content[endByte:]...)
result = append(result, content[:end]...)
result = append(result, insertion...)
result = append(result, content[end:]...)
case EditDelete:
result = append(result, content[:startByte]...)
result = append(result, content[endByte:]...)
result = append(result, content[:start]...)
result = append(result, content[end:]...)
default:
return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", edit.Operation))
return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", op))
}
return result, nil
}
// detectIndentation detects the indentation at a given byte position.
func detectIndentation(content []byte, bytePos int) string {
// Find the start of the line
lineStart := bytePos
for lineStart > 0 && content[lineStart-1] != '\n' {
lineStart--
}
// Extract leading whitespace
var indent strings.Builder
for i := lineStart; i < bytePos && i < len(content); i++ {
c := content[i]
if c == ' ' || c == '\t' {
indent.WriteByte(c)
} else {
break
}
}
return indent.String()
}
// indentContent applies indentation to multi-line content.
func indentContent(content string, indent string) string {
if indent == "" {
return content
}
lines := strings.Split(content, "\n")
for i, line := range lines {
if i > 0 && line != "" {
lines[i] = indent + line
}
}
return strings.Join(lines, "\n")
}
// noNewlineMarker is the git-style annotation emitted after a diff line whose source
// version has no trailing newline.
const noNewlineMarker = "\\ No newline at end of file\n"
// diffLine represents a single line in the diff with its type and content.
type diffLine struct {
@@ -582,32 +588,56 @@ type diffLine struct {
newN int // 1-based line number in modified (0 if delete)
}
// indexRange is an inclusive [start, end] range of diffLine indices forming one hunk.
type indexRange struct{ start, end int }
// generateDiff creates a unified diff between original and modified content.
// Uses line-level Myers diff algorithm and outputs a proper unified diff
// with context lines (3 before/after each change, merging close hunks).
// Uses a line-level Myers diff and outputs a unified diff with 3 lines of context
// before/after each change, merging close hunks.
func (e *Engine) generateDiff(original, modified, filename string) string {
dmp := e.dmp
// Use line-level diffing: encode each line as a single character,
// diff the encoded strings, then decode back to real lines.
// Line-level diffing: encode each line as a single rune, diff the encoded strings,
// then decode back to real lines.
chars1, chars2, lineArray := dmp.DiffLinesToChars(original, modified)
diffs := dmp.DiffMain(chars1, chars2, false)
diffs = dmp.DiffCharsToLines(diffs, lineArray)
// Cleanup for readability
diffs := dmp.DiffCharsToLines(dmp.DiffMain(chars1, chars2, false), lineArray)
diffs = dmp.DiffCleanupSemantic(diffs)
// Flatten diffs into individual lines with line numbers
var lines []diffLine
oldLine := 1
newLine := 1
// Track whether each version lacks a final newline, so the diff is annotated
// git-style ("\ No newline at end of file") instead of implying a phantom one.
origNoEOL := len(original) > 0 && !strings.HasSuffix(original, "\n")
modNoEOL := len(modified) > 0 && !strings.HasSuffix(modified, "\n")
lines, maxOldN, maxNewN := flattenDiffLines(diffs)
ranges := diffHunkRanges(lines)
if len(ranges) == 0 {
return "" // no changes
}
var buf bytes.Buffer
fmt.Fprintf(&buf, "--- %s\n", filename)
fmt.Fprintf(&buf, "+++ %s\n", filename)
for _, r := range ranges {
oldStart, oldCount, newStart, newCount := hunkBounds(lines, r.start, r.end)
fmt.Fprintf(&buf, "@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount)
writeDiffBody(&buf, lines, r.start, r.end, origNoEOL, modNoEOL, maxOldN, maxNewN)
}
return buf.String()
}
// flattenDiffLines expands diff segments into per-line records with 1-based line numbers,
// returning the lines plus the final line number of each version (for no-newline marking).
func flattenDiffLines(diffs []diffmatchpatch.Diff) (lines []diffLine, maxOldN, maxNewN int) {
oldLine, newLine := 1, 1
for _, d := range diffs {
rawLines := strings.SplitAfter(d.Text, "\n")
for _, raw := range rawLines {
for _, raw := range strings.SplitAfter(d.Text, "\n") {
if raw == "" {
continue
}
text := strings.TrimSuffix(raw, "\n")
// Strip the terminator for display; also drop a trailing CR so CRLF files
// do not leak raw carriage returns into the rendered diff.
text := strings.TrimSuffix(strings.TrimSuffix(raw, "\n"), "\r")
switch d.Type {
case diffmatchpatch.DiffEqual:
lines = append(lines, diffLine{op: d.Type, text: text, oldN: oldLine, newN: newLine})
@@ -622,97 +652,88 @@ func (e *Engine) generateDiff(original, modified, filename string) string {
}
}
}
return lines, oldLine - 1, newLine - 1
}
// Identify indices of changed lines
// diffHunkRanges returns the inclusive index ranges to emit: each changed line padded by
// 3 lines of context, with overlapping/adjacent ranges merged.
func diffHunkRanges(lines []diffLine) []indexRange {
const contextSize = 3
var changedIndices []int
for i, l := range lines {
if l.op != diffmatchpatch.DiffEqual {
changedIndices = append(changedIndices, i)
}
}
if len(changedIndices) == 0 {
return "" // no changes
}
// Build inclusion ranges: for each changed line, include contextSize lines before/after.
// Merge overlapping or adjacent ranges (gap <= 2*contextSize = 6 context lines).
type indexRange struct{ start, end int } // inclusive
var ranges []indexRange
for _, ci := range changedIndices {
rStart := ci - contextSize
if rStart < 0 {
rStart = 0
}
rEnd := ci + contextSize
if rEnd >= len(lines) {
rEnd = len(lines) - 1
for i, l := range lines {
if l.op == diffmatchpatch.DiffEqual {
continue
}
rStart := max(i-contextSize, 0)
rEnd := min(i+contextSize, len(lines)-1)
if len(ranges) > 0 && rStart <= ranges[len(ranges)-1].end+1 {
// Merge with previous range
ranges[len(ranges)-1].end = rEnd
ranges[len(ranges)-1].end = rEnd // merge with previous
} else {
ranges = append(ranges, indexRange{rStart, rEnd})
}
}
return ranges
}
// Emit unified diff
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("--- %s\n", filename))
buf.WriteString(fmt.Sprintf("+++ %s\n", filename))
for _, r := range ranges {
// Determine hunk header line numbers
var oldStart, oldCount, newStart, newCount int
for i := r.start; i <= r.end; i++ {
l := lines[i]
switch l.op {
case diffmatchpatch.DiffEqual:
if oldCount == 0 {
oldStart = l.oldN
}
if newCount == 0 {
newStart = l.newN
}
oldCount++
newCount++
case diffmatchpatch.DiffDelete:
if oldCount == 0 {
oldStart = l.oldN
}
if newCount == 0 {
// Set newStart from context or next available
newStart = l.oldN // approximate
}
oldCount++
case diffmatchpatch.DiffInsert:
if newCount == 0 {
newStart = l.newN
}
if oldCount == 0 {
oldStart = l.newN // approximate
}
newCount++
// hunkBounds computes the unified-diff hunk header line numbers and counts for
// lines[start:end+1]. newStart/oldStart for one-sided lines are approximate.
func hunkBounds(lines []diffLine, start, end int) (oldStart, oldCount, newStart, newCount int) {
for i := start; i <= end; i++ {
l := lines[i]
switch l.op {
case diffmatchpatch.DiffEqual:
if oldCount == 0 {
oldStart = l.oldN
}
if newCount == 0 {
newStart = l.newN
}
oldCount++
newCount++
case diffmatchpatch.DiffDelete:
if oldCount == 0 {
oldStart = l.oldN
}
if newCount == 0 {
newStart = l.oldN // approximate
}
oldCount++
case diffmatchpatch.DiffInsert:
if newCount == 0 {
newStart = l.newN
}
if oldCount == 0 {
oldStart = l.newN // approximate
}
newCount++
}
}
return
}
buf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount))
for i := r.start; i <= r.end; i++ {
l := lines[i]
switch l.op {
case diffmatchpatch.DiffEqual:
buf.WriteString(fmt.Sprintf(" %s\n", l.text))
case diffmatchpatch.DiffDelete:
buf.WriteString(fmt.Sprintf("-%s\n", l.text))
case diffmatchpatch.DiffInsert:
buf.WriteString(fmt.Sprintf("+%s\n", l.text))
// writeDiffBody writes the space/-/+ body lines for one hunk, appending the git-style
// no-newline marker after the final line of any version that lacks a trailing newline.
func writeDiffBody(buf *bytes.Buffer, lines []diffLine, start, end int, origNoEOL, modNoEOL bool, maxOldN, maxNewN int) {
for i := start; i <= end; i++ {
l := lines[i]
switch l.op {
case diffmatchpatch.DiffEqual:
fmt.Fprintf(buf, " %s\n", l.text)
if (origNoEOL && l.oldN == maxOldN) || (modNoEOL && l.newN == maxNewN) {
buf.WriteString(noNewlineMarker)
}
case diffmatchpatch.DiffDelete:
fmt.Fprintf(buf, "-%s\n", l.text)
if origNoEOL && l.oldN == maxOldN {
buf.WriteString(noNewlineMarker)
}
case diffmatchpatch.DiffInsert:
fmt.Fprintf(buf, "+%s\n", l.text)
if modNoEOL && l.newN == maxNewN {
buf.WriteString(noNewlineMarker)
}
}
}
return buf.String()
}
// resolveTextSelector finds the byte range for a text-based selection.
@@ -831,57 +852,11 @@ func (e *Engine) findLineRange(content []byte, lineStart, lineEnd int) (start, e
return start, end, nil
}
// applyTextEditOperation applies a text edit operation.
// applyTextEditOperation applies a text-mode edit. Like AST mode, new_content is inserted
// verbatim (no auto-indentation) with its line endings normalized to the file's convention.
func (e *Engine) applyTextEditOperation(op EditOperation, content []byte, start, end int, newContent string) ([]byte, error) {
// Detect indentation at the selection point
indentation := detectIndentation(content, start)
indentedContent := indentContent(newContent, indentation)
var result []byte
switch op {
case EditReplace:
result = append(result, content[:start]...)
result = append(result, []byte(indentedContent)...)
// Preserve trailing newline: if selection ended with \n but replacement doesn't,
// re-add it to prevent line merging
if end > start && content[end-1] == '\n' && !strings.HasSuffix(indentedContent, "\n") {
result = append(result, '\n')
}
result = append(result, content[end:]...)
case EditInsertBefore:
insertion := indentedContent
if !strings.HasSuffix(insertion, "\n") {
insertion += "\n"
}
result = append(result, content[:start]...)
result = append(result, []byte(insertion)...)
result = append(result, content[start:]...)
case EditInsertAfter:
insertion := indentedContent
// Ensure separation from preceding content
if end > 0 && content[end-1] != '\n' && !strings.HasPrefix(insertion, "\n") {
insertion = "\n" + insertion
}
// Ensure separation from following content
if !strings.HasSuffix(insertion, "\n") && end < len(content) && content[end] != '\n' {
insertion += "\n"
}
result = append(result, content[:end]...)
result = append(result, []byte(insertion)...)
result = append(result, content[end:]...)
case EditDelete:
result = append(result, content[:start]...)
result = append(result, content[end:]...)
default:
return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", op))
}
return result, nil
eol := detectLineEnding(content)
return spliceContent(op, content, start, end, normalizeLineEndings(newContent, eol), eol)
}
// truncateString truncates a string to maxLen with ellipsis.
-37
View File
@@ -353,43 +353,6 @@ func Hello() {
}
}
func TestDetectIndentation(t *testing.T) {
tests := []struct {
name string
content string
want string
pos int
}{
{
name: "no indent",
content: "func main() {}",
pos: 0,
want: "",
},
{
name: "tab indent",
content: "func main() {\n\tprintln(\"hello\")\n}",
pos: 15,
want: "\t",
},
{
name: "space indent",
content: "func main() {\n println(\"hello\")\n}",
pos: 18,
want: " ",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := detectIndentation([]byte(tt.content), tt.pos)
if got != tt.want {
t.Errorf("detectIndentation() = %q, want %q", got, tt.want)
}
})
}
}
func TestGenerateDiff(t *testing.T) {
original := "line1\nline2\nline3"
modified := "line1\nmodified\nline3"
+172
View File
@@ -0,0 +1,172 @@
package edit
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"github.com/lukaszraczylo/mcp-filepuff/internal/parser"
)
// mkEngine builds an Engine with a registry that is closed at test end.
func mkEngine(t *testing.T) *Engine {
t.Helper()
reg := parser.NewRegistry()
t.Cleanup(reg.Close)
return NewEngine(reg)
}
// applyEditFile writes content to a temp file, applies the edit, and returns the on-disk bytes.
func applyEditFile(t *testing.T, name, content string, e *ASTEdit) string {
t.Helper()
dir := t.TempDir()
p := filepath.Join(dir, name)
if err := os.WriteFile(p, []byte(content), 0o600); err != nil {
t.Fatalf("write temp: %v", err)
}
e.File = p
res, err := mkEngine(t).Apply(context.Background(), e)
if err != nil {
t.Fatalf("apply: %v", err)
}
if !res.Success {
t.Fatalf("apply unsuccessful: %s", res.Error)
}
got, err := os.ReadFile(p)
if err != nil {
t.Fatalf("readback: %v", err)
}
return string(got)
}
// assertAllCRLF fails if s contains any bare LF (not preceded by CR) or a doubled CR.
func assertAllCRLF(t *testing.T, s string) {
t.Helper()
if strings.Count(s, "\n") != strings.Count(s, "\r\n") {
t.Fatalf("file contains bare LF (mixed line endings): %q", s)
}
if strings.Contains(s, "\r\r") {
t.Fatalf("file contains doubled CR: %q", s)
}
}
// ---- Cluster C: text-mode edits insert new_content verbatim (no auto-indentation) ----
func TestTextEditVerbatimNoAutoIndent(t *testing.T) {
// selector_text matches a token that sits at one tab of indentation, so the old
// code detected "\t" and re-indented continuation lines of new_content. Because
// new_content is itself already indented, that produced a DOUBLE tab on line 2.
src := "func f() {\n\tOLD\n}\n"
got := applyEditFile(t, "f.txt", src, &ASTEdit{
Operation: EditReplace,
NewContent: "A\n\tB", // line 2 already carries its own tab
Selector: ASTSelector{Text: "OLD"},
})
want := "func f() {\n\tA\n\tB\n}\n"
if got != want {
t.Fatalf("text edit must insert new_content verbatim (no auto-indent).\nwant: %q\ngot: %q", want, got)
}
}
// ---- Cluster B: edits preserve the file's original line-ending convention ----
func TestASTEditPreservesCRLF(t *testing.T) {
src := "package main\r\n\r\nfunc demo() {\r\n\tprintln(\"old\")\r\n}\r\n"
got := applyEditFile(t, "demo.go", src, &ASTEdit{
Operation: EditReplace,
NewContent: "func demo() {\n\treturn\n}",
Selector: ASTSelector{Kind: "function_declaration", Name: "demo"},
})
assertAllCRLF(t, got)
if !strings.Contains(got, "func demo() {\r\n\treturn\r\n}") {
t.Fatalf("replacement not normalized to CRLF: %q", got)
}
}
func TestTextEditReplacePreservesCRLF(t *testing.T) {
src := "alpha\r\nbravo\r\ncharlie\r\n"
got := applyEditFile(t, "f.txt", src, &ASTEdit{
Operation: EditReplace,
NewContent: "BRAVO1\nBRAVO2",
Selector: ASTSelector{AtLine: 2, LineEnd: 2},
})
assertAllCRLF(t, got)
want := "alpha\r\nBRAVO1\r\nBRAVO2\r\ncharlie\r\n"
if got != want {
t.Fatalf("CRLF text replace.\nwant: %q\ngot: %q", want, got)
}
}
func TestTextEditInsertAfterPreservesCRLF(t *testing.T) {
src := "alpha\r\nbravo\r\n"
got := applyEditFile(t, "f.txt", src, &ASTEdit{
Operation: EditInsertAfter,
NewContent: "INSERTED",
Selector: ASTSelector{AtLine: 1, LineEnd: 1},
})
assertAllCRLF(t, got)
want := "alpha\r\nINSERTED\r\nbravo\r\n"
if got != want {
t.Fatalf("CRLF insert_after.\nwant: %q\ngot: %q", want, got)
}
}
func TestLFFileStaysLFWhenNewContentHasCRLF(t *testing.T) {
src := "alpha\nbravo\ncharlie\n"
got := applyEditFile(t, "f.txt", src, &ASTEdit{
Operation: EditReplace,
NewContent: "B1\r\nB2", // stray CRLF in new_content must be normalized to the LF file
Selector: ASTSelector{AtLine: 2, LineEnd: 2},
})
if strings.Contains(got, "\r") {
t.Fatalf("LF file must not gain CR: %q", got)
}
want := "alpha\nB1\nB2\ncharlie\n"
if got != want {
t.Fatalf("LF normalization.\nwant: %q\ngot: %q", want, got)
}
}
// ---- Cluster D: diff rendering ----
func TestDiffMarksNoNewlineAtEOF(t *testing.T) {
dir := t.TempDir()
p := filepath.Join(dir, "f.txt")
if err := os.WriteFile(p, []byte("alpha\nbravo"), 0o600); err != nil { // no trailing newline
t.Fatalf("write: %v", err)
}
res, err := mkEngine(t).Preview(context.Background(), &ASTEdit{
File: p,
Operation: EditReplace,
NewContent: "BRAVO",
Selector: ASTSelector{AtLine: 2, LineEnd: 2},
})
if err != nil || !res.Success {
t.Fatalf("preview failed: %v %s", err, res.Error)
}
if !strings.Contains(res.Diff, "No newline at end of file") {
t.Fatalf("diff should mark missing final newline, got:\n%s", res.Diff)
}
}
func TestDiffHasNoRawCarriageReturn(t *testing.T) {
dir := t.TempDir()
p := filepath.Join(dir, "f.txt")
if err := os.WriteFile(p, []byte("alpha\r\nbravo\r\n"), 0o600); err != nil {
t.Fatalf("write: %v", err)
}
res, err := mkEngine(t).Preview(context.Background(), &ASTEdit{
File: p,
Operation: EditReplace,
NewContent: "BRAVO",
Selector: ASTSelector{AtLine: 2, LineEnd: 2},
})
if err != nil || !res.Success {
t.Fatalf("preview failed: %v %s", err, res.Error)
}
if strings.Contains(res.Diff, "\r") {
t.Fatalf("diff display must not contain raw CR, got:\n%q", res.Diff)
}
}
+78 -30
View File
@@ -81,22 +81,21 @@ func stripLicense(content string) (string, bool) {
}
}
// Python/hash-style leading comment block
// Python/hash-style leading comment block. Only contiguous "#" lines belong to the
// header; a blank line ends it and is preserved as a separator (rather than being
// greedily swallowed and collapsed away).
if strings.HasPrefix(trimmed, "#") {
lines := strings.Split(trimmed, "\n")
var commentLines []string
var rest []string
inComment := true
var commentLines, rest []string
for i, l := range lines {
if inComment && (strings.HasPrefix(l, "#") || strings.TrimSpace(l) == "") {
if strings.HasPrefix(l, "#") {
commentLines = append(commentLines, l)
} else {
rest = lines[i:]
break
continue
}
rest = lines[i:]
break
}
block := strings.Join(commentLines, "\n")
lower := strings.ToLower(block)
lower := strings.ToLower(strings.Join(commentLines, "\n"))
if strings.Contains(lower, "copyright") ||
strings.Contains(lower, "license") ||
strings.Contains(lower, "spdx-license-identifier") {
@@ -240,60 +239,109 @@ func stripBlockComments(content string, lang protocol.Language) (string, bool) {
return stripCStyleBlockComments(content)
}
// stripCStyleBlockComments removes /* ... */ from content.
// trimTrailingLineWhitespace drops trailing spaces/tabs from out (back to, but not past,
// the previous newline). Used when a standalone comment line is removed so its leading
// indentation does not linger as a whitespace-only line.
func trimTrailingLineWhitespace(out []byte) []byte {
for len(out) > 0 && (out[len(out)-1] == ' ' || out[len(out)-1] == '\t') {
out = out[:len(out)-1]
}
return out
}
// skipLineTail advances i over trailing spaces/tabs and a CR, then a single LF — i.e. the
// remainder of a line after a standalone comment's closer, including its \n or \r\n
// terminator. Returns the new index.
func skipLineTail(content string, i int) int {
for i < len(content) && (content[i] == ' ' || content[i] == '\t' || content[i] == '\r') {
i++
}
if i < len(content) && content[i] == '\n' {
i++
}
return i
}
// stripCStyleBlockComments removes /* ... */ comments. A comment that occupies a whole
// line (only whitespace before it) is removed together with that line's indentation and
// terminator; an inline comment (code precedes it) is removed in place, leaving the
// surrounding line — and crucially its terminator — intact so lines are never merged.
func stripCStyleBlockComments(content string) (string, bool) {
removed := false
var sb strings.Builder
out := make([]byte, 0, len(content))
lineHasNonSpace := false
i := 0
for i < len(content) {
if i+1 < len(content) && content[i] == '/' && content[i+1] == '*' {
// find closing */
end := strings.Index(content[i+2:], "*/")
if end >= 0 {
if end := strings.Index(content[i+2:], "*/"); end >= 0 {
removed = true
// advance past */
i = i + 2 + end + 2
// consume trailing newline
if i < len(content) && content[i] == '\n' {
i++
standalone := !lineHasNonSpace
i = i + 2 + end + 2 // advance past closing */
if standalone {
out = trimTrailingLineWhitespace(out)
i = skipLineTail(content, i)
lineHasNonSpace = false
}
continue
}
}
sb.WriteByte(content[i])
c := content[i]
switch c {
case '\n':
lineHasNonSpace = false
case ' ', '\t', '\r':
// whitespace: does not mark the line as having content
default:
lineHasNonSpace = true
}
out = append(out, c)
i++
}
if !removed {
return content, false
}
return sb.String(), true
return string(out), true
}
// stripPythonDocstrings removes triple-quoted strings (""" and ”').
// stripPythonDocstrings removes triple-quoted strings (""" and ”'). As with block
// comments, a standalone docstring line is removed along with its indentation and
// terminator, while an inline triple-quoted string leaves its line's terminator intact.
func stripPythonDocstrings(content string) (string, bool) {
removed := false
var sb strings.Builder
out := make([]byte, 0, len(content))
lineHasNonSpace := false
i := 0
for i < len(content) {
if i+2 < len(content) {
triple := content[i : i+3]
if triple == `"""` || triple == `'''` {
end := strings.Index(content[i+3:], triple)
if end >= 0 {
if end := strings.Index(content[i+3:], triple); end >= 0 {
removed = true
standalone := !lineHasNonSpace
i = i + 3 + end + 3
if i < len(content) && content[i] == '\n' {
i++
if standalone {
out = trimTrailingLineWhitespace(out)
i = skipLineTail(content, i)
lineHasNonSpace = false
}
continue
}
}
}
sb.WriteByte(content[i])
c := content[i]
switch c {
case '\n':
lineHasNonSpace = false
case ' ', '\t', '\r':
// whitespace: does not mark the line as having content
default:
lineHasNonSpace = true
}
out = append(out, c)
i++
}
if !removed {
return content, false
}
return sb.String(), true
return string(out), true
}
+47
View File
@@ -0,0 +1,47 @@
package parser
import (
"testing"
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
)
// An inline block comment (code before it on the same line) must not cause the following
// line to be merged onto it — the line's terminator must survive.
func TestStripBlockCommentInlineNoLineMerge(t *testing.T) {
got := StripContent("a := 1 /* note */\nb := 2\n", []StripFlag{StripBlockComments}, protocol.LangGo)
want := "a := 1 \nb := 2\n"
if got.Content != want {
t.Fatalf("inline block comment must not merge lines.\nwant: %q\ngot: %q", want, got.Content)
}
}
// A standalone block-comment line (only whitespace before it) is removed in full,
// including its indentation and terminator — no stray blank/whitespace line left behind.
func TestStripBlockCommentStandaloneRemovesLine(t *testing.T) {
got := StripContent("x\n\t/* c */\ny\n", []StripFlag{StripBlockComments}, protocol.LangGo)
want := "x\ny\n"
if got.Content != want {
t.Fatalf("standalone block comment line must be removed cleanly.\nwant: %q\ngot: %q", want, got.Content)
}
}
// On a CRLF file, removing a standalone block-comment line must consume the full \r\n
// terminator rather than leaving a stray blank (bare-CR) line.
func TestStripBlockCommentCRLFNoStrayBlank(t *testing.T) {
got := StripContent("code\r\n/* c */\r\nmore\r\n", []StripFlag{StripBlockComments}, protocol.LangGo)
want := "code\r\nmore\r\n"
if got.Content != want {
t.Fatalf("CRLF standalone block comment must not leave a stray blank line.\nwant: %q\ngot: %q", want, got.Content)
}
}
// Stripping a hash-style license header must not greedily swallow the blank separator
// line that follows it.
func TestStripLicensePythonPreservesSeparatorBlank(t *testing.T) {
got := StripContent("# Copyright 2024\n# License MIT\n\ncode\n", []StripFlag{StripLicense}, protocol.LangPython)
want := "\ncode\n"
if got.Content != want {
t.Fatalf("python license strip must keep the blank separator.\nwant: %q\ngot: %q", want, got.Content)
}
}
+18 -19
View File
@@ -11,16 +11,6 @@ import (
"github.com/mark3labs/mcp-go/mcp"
)
// unescapeNewlines converts literal \n, \t, \" sequences to actual characters.
// This handles cases where MCP clients send double-escaped JSON strings.
func unescapeNewlines(s string) string {
s = strings.ReplaceAll(s, "\\n", "\n")
s = strings.ReplaceAll(s, "\\t", "\t")
s = strings.ReplaceAll(s, "\\\"", "\"")
s = strings.ReplaceAll(s, "\\\\", "\\")
return s
}
// handleEditApply handles the edit_apply tool.
func (s *Server) handleEditApply(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
return s.handleEdit(ctx, request)
@@ -51,8 +41,12 @@ func (s *Server) handleEdit(ctx context.Context, request mcp.CallToolRequest) (*
return mcp.NewToolResultError("file is outside workspace root"), nil
}
// new_content arrives already fully decoded by the JSON-RPC layer (mcp-go):
// JSON escapes such as \n, \t, \\, \" have been resolved to their real bytes.
// It must therefore be used verbatim — any further unescaping would corrupt
// legitimate backslash sequences in source code (string literals, regexes,
// Windows paths). See TestEditApplyPreservesBackslashSequences*.
newContent := request.GetString("new_content", "")
newContent = unescapeNewlines(newContent)
selectorName := request.GetString("selector_name", "")
@@ -114,21 +108,26 @@ func (s *Server) handleEdit(ctx context.Context, request mcp.CallToolRequest) (*
}
}
// countDiffLines counts added (+) and removed (-) lines in a unified diff string.
// countDiffLines counts added (+) and removed (-) content lines in a unified diff.
// Only lines inside hunks are counted (everything after the first "@@" header), so the
// "---"/"+++" file headers are skipped structurally — and content whose own text starts
// with + or - is counted correctly rather than mistaken for a header. The git-style
// "\ No newline at end of file" marker is ignored.
func countDiffLines(diff string) (added, removed int) {
inHunk := false
for _, line := range strings.Split(diff, "\n") {
if len(line) == 0 {
if strings.HasPrefix(line, "@@") {
inHunk = true
continue
}
if !inHunk || line == "" {
continue
}
switch line[0] {
case '+':
if !strings.HasPrefix(line, "+++") {
added++
}
added++
case '-':
if !strings.HasPrefix(line, "---") {
removed++
}
removed++
}
}
return
+95 -39
View File
@@ -1,47 +1,103 @@
package server
import "testing"
import (
"context"
"os"
"strings"
"testing"
func TestUnescapeNewlines(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "no escapes",
input: "func test() {\n\treturn true;\n}",
expected: "func test() {\n\treturn true;\n}",
},
{
name: "literal backslash n",
input: "func test() {\\n\\treturn true;\\n}",
expected: "func test() {\n\treturn true;\n}",
},
{
name: "literal backslash t",
input: "func test() {\\n\\treturn true;\\n}",
expected: "func test() {\n\treturn true;\n}",
},
{
name: "literal quotes",
input: `func test() {\n\treturn \"true\";\n}`,
expected: "func test() {\n\treturn \"true\";\n}",
},
"github.com/mark3labs/mcp-go/mcp"
)
{
name: "mixed content",
input: "line1\\nline2\\tindented\\nline3",
expected: "line1\nline2\tindented\nline3",
},
// callEdit invokes handleEditApply with the given args and returns the result.
func callEdit(t *testing.T, srv *Server, args map[string]any) *mcp.CallToolResult {
t.Helper()
req := mcp.CallToolRequest{}
req.Params.Arguments = args
res, err := srv.handleEditApply(context.Background(), req)
if err != nil {
t.Fatalf("handleEditApply error: %v", err)
}
if res == nil {
t.Fatal("handleEditApply returned nil result")
}
return res
}
// TestEditApplyPreservesBackslashSequencesText is the primary regression guard for the
// double-unescape bug: new_content arrives from the JSON-RPC layer already fully decoded,
// so the handler must write it to disk verbatim. Literal backslash sequences (\n, \t, \",
// \\) that legitimately appear in source code must survive byte-for-byte.
func TestEditApplyPreservesBackslashSequencesText(t *testing.T) {
tmpDir := t.TempDir()
srv := newTestServer(t, tmpDir)
f := writeFile(t, tmpDir, "note.txt", "PLACEHOLDER\n")
// Exactly the bytes a client intends after JSON decoding: backslash sequences are
// real source text here, NOT escapes to be interpreted.
newContent := `printf("a\nb\tc"); re = \d+; path = "C:\\tmp"; q = \"`
res := callEdit(t, srv, map[string]any{
"file": f,
"operation": "replace",
"selector_text": "PLACEHOLDER",
"new_content": newContent,
"response": "none",
})
if res.IsError {
t.Fatalf("edit returned error: %+v", res.Content)
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := unescapeNewlines(tt.input)
if result != tt.expected {
t.Errorf("unescapeNewlines(%q) = %q, expected %q", tt.input, result, tt.expected)
}
})
got, err := os.ReadFile(f)
if err != nil {
t.Fatalf("read back: %v", err)
}
want := newContent + "\n"
if string(got) != want {
t.Fatalf("new_content not written verbatim (double-unescape regression).\nwant: %q\ngot: %q", want, string(got))
}
}
// TestCountDiffLinesHunkAware verifies the line counter skips file headers structurally and
// counts content lines whose own text begins with + or - (previously dropped as "+++"/"---").
func TestCountDiffLinesHunkAware(t *testing.T) {
diff := "--- a.txt\n+++ a.txt\n@@ -1,3 +1,3 @@\n context\n+++plusprefix\n+normal add\n---minusprefix\n-normal del\n"
added, removed := countDiffLines(diff)
if added != 2 || removed != 2 {
t.Fatalf("hunk-aware diff count wrong: added=%d removed=%d (want 2 and 2)", added, removed)
}
}
// TestEditApplyPreservesBackslashSequencesCode covers the AST/code path: the same verbatim
// guarantee must hold for syntactically-validated code files.
func TestEditApplyPreservesBackslashSequencesCode(t *testing.T) {
tmpDir := t.TempDir()
srv := newTestServer(t, tmpDir)
src := "package main\n\nfunc demo() {\n\tprintln(\"old\")\n}\n"
f := writeFile(t, tmpDir, "demo.go", src)
// Real tab indentation + literal backslash sequences inside string/raw-string literals.
newBody := "func demo() {\n\tprintln(\"a\\nb\\tc\")\n\t_ = `\\d+`\n\t_ = \"C:\\\\tmp\"\n}"
res := callEdit(t, srv, map[string]any{
"file": f,
"operation": "replace",
"selector_kind": "function_declaration",
"selector_name": "demo",
"new_content": newBody,
"response": "none",
})
if res.IsError {
t.Fatalf("edit returned error: %+v", res.Content)
}
got, err := os.ReadFile(f)
if err != nil {
t.Fatalf("read back: %v", err)
}
if !strings.Contains(string(got), newBody) {
t.Fatalf("backslash sequences corrupted in code edit.\nwant substring:\n%q\ngot file:\n%q", newBody, string(got))
}
}
+2
View File
@@ -137,6 +137,8 @@ const helpEditApply = "# edit_apply — flags and examples\n\n" +
"| `none` | Empty response (silent success) |\n\n" +
"`compact_response=true` is a deprecated alias for `response=\"count\"` kept for pre-v2 compatibility.\n\n" +
"For code files (Go, TypeScript, JavaScript, Python, C, C++, Rust) syntax is validated before writing — the edit is rejected if it would produce invalid syntax.\n\n" +
"## new_content handling\n\n" +
"`new_content` is inserted **verbatim**. Send it as normal JSON — real newlines (or JSON `\\n`), with quotes JSON-escaped as usual. The server does not re-interpret escape sequences, so literal backslash sequences in source (`\\n`, `\\t`, `\\\\`, regexes, Windows paths) are written exactly as given. Indentation is not auto-applied — include the leading whitespace you want. Line endings are normalized to the file's existing convention (LF or CRLF).\n\n" +
"## Selector types\n\n" +
"### AST-mode selectors (code files)\n" +
"- `selector_kind` — AST node type (e.g. `function_declaration`, `class_declaration`)\n" +
+1 -1
View File
@@ -313,7 +313,7 @@ func (s *Server) registerTools() {
mcp.Description("Edit operation: replace, insert_before, insert_after, delete"),
),
mcp.WithString("new_content",
mcp.Description("New content (required for replace/insert operations)"),
mcp.Description("New content (required for replace/insert operations). Inserted verbatim: send it as normal JSON (real newlines, quotes JSON-escaped as usual). The server does NOT re-interpret escape sequences, so backslash sequences in source code (e.g. \\n, \\t, \\\\ inside string literals, regexes, or Windows paths) are preserved exactly. Indentation is not auto-applied — include the leading whitespace you want. Line endings are normalized to the file's existing convention (LF or CRLF)."),
),
// AST-mode selectors (for code files)
mcp.WithString("selector_kind",