From 9af2801b1b5c9d0e31aee29ce09ad71d090c3f32 Mon Sep 17 00:00:00 2001 From: Lukasz Raczylo Date: Fri, 29 May 2026 00:17:36 +0100 Subject: [PATCH] refactor(edit): remove auto-indentation and add line-ending normalization - [x] Remove auto-indentation from text mode edits (caller controls whitespace) - [x] Add line-ending detection and normalization for both AST and text modes - [x] Share edit logic via new `spliceContent` function for both modes - [x] Fix diff to emit "No newline at end of file" markers - [x] Fix diff to strip raw CR from CRLF file output - [x] Remove double-unescape of backslash sequences in new_content - [x] Fix countDiffLines to be hunk-aware (correctly count lines starting with +/-) - [x] Fix block-comment stripping to remove standalone lines cleanly - [x] Fix Python license header stripping to preserve separator blank lines --- internal/edit/edit.go | 391 ++++++++++++------------- internal/edit/edit_test.go | 37 --- internal/edit/escaping_newline_test.go | 172 +++++++++++ internal/parser/strip.go | 108 +++++-- internal/parser/strip_newline_test.go | 47 +++ internal/server/handlers_edit.go | 37 ++- internal/server/handlers_edit_test.go | 134 ++++++--- internal/server/help_content.go | 2 + internal/server/server.go | 2 +- 9 files changed, 596 insertions(+), 334 deletions(-) create mode 100644 internal/edit/escaping_newline_test.go create mode 100644 internal/parser/strip_newline_test.go diff --git a/internal/edit/edit.go b/internal/edit/edit.go index 9844f4d..aabe663 100644 --- a/internal/edit/edit.go +++ b/internal/edit/edit.go @@ -481,98 +481,104 @@ func (e *Engine) matchesSelector(sel ASTSelector, n *sitter.Node, content []byte return true } -// applyEdit applies the edit operation to the content. -// AST mode uses exact byte positions — new_content is inserted verbatim without auto-indentation. +// applyEdit applies an AST-mode edit. new_content is inserted verbatim — the caller +// controls indentation — and only line endings are normalized to the file's convention. func (e *Engine) applyEdit(edit *ASTEdit, node *sitter.Node, content []byte) ([]byte, error) { - startByte := node.StartByte() - endByte := node.EndByte() + eol := detectLineEnding(content) + newContent := normalizeLineEndings(edit.NewContent, eol) + return spliceContent(edit.Operation, content, int(node.StartByte()), int(node.EndByte()), newContent, eol) +} - newContent := edit.NewContent +// detectLineEnding reports the file's dominant line-ending convention: "\r\n" when any +// CRLF terminator is present, otherwise "\n". +func detectLineEnding(content []byte) string { + if bytes.Contains(content, []byte("\r\n")) { + return "\r\n" + } + return "\n" +} + +// normalizeLineEndings rewrites every line ending in s to eol. It first collapses CRLF to +// LF, then expands to the target, so mixed input becomes uniform and new_content can never +// introduce a line ending foreign to the file being edited. +func normalizeLineEndings(s, eol string) string { + if s == "" { + return s + } + s = strings.ReplaceAll(s, "\r\n", "\n") + if eol != "\n" { + s = strings.ReplaceAll(s, "\n", eol) + } + return s +} + +func endsWithNewline(s string) bool { return strings.HasSuffix(s, "\n") } + +func startsWithNewline(s string) bool { + return s != "" && (s[0] == '\n' || s[0] == '\r') +} + +// spliceContent applies an edit operation by splicing newContent into content over the +// byte range [start, end). It is shared by AST and text modes — once auto-indentation is +// removed the two are identical. Restored terminators and separators use eol so the +// file's line-ending convention is preserved. +func spliceContent(op EditOperation, content []byte, start, end int, newContent, eol string) ([]byte, error) { + // A line-based selection on a CRLF file can land `end` between the \r (treated as + // line content) and the \n of a terminator. Pull it back so the full \r\n stays + // intact in the tail and is never split into a bare LF. + if end > start && end < len(content) && content[end-1] == '\r' && content[end] == '\n' { + end-- + } var result []byte - - switch edit.Operation { + switch op { case EditReplace: - result = append(result, content[:startByte]...) - result = append(result, []byte(newContent)...) - // Preserve trailing newline: if selection ended with \n but replacement doesn't, - // re-add it to prevent line merging - if endByte > startByte && content[endByte-1] == '\n' && !strings.HasSuffix(newContent, "\n") { - result = append(result, '\n') + result = append(result, content[:start]...) + result = append(result, newContent...) + // Restore a line terminator if the replaced range ended with one but the + // replacement does not, to prevent merging with the following line. + if end > start && content[end-1] == '\n' && !endsWithNewline(newContent) { + result = append(result, eol...) } - result = append(result, content[endByte:]...) + result = append(result, content[end:]...) case EditInsertBefore: insertion := newContent - if !strings.HasSuffix(insertion, "\n") { - insertion += "\n" + if !endsWithNewline(insertion) { + insertion += eol } - result = append(result, content[:startByte]...) - result = append(result, []byte(insertion)...) - result = append(result, content[startByte:]...) + result = append(result, content[:start]...) + result = append(result, insertion...) + result = append(result, content[start:]...) case EditInsertAfter: insertion := newContent - // Ensure separation from preceding content - if endByte > 0 && content[endByte-1] != '\n' && !strings.HasPrefix(insertion, "\n") { - insertion = "\n" + insertion + // Separate from preceding content. + if end > 0 && content[end-1] != '\n' && !startsWithNewline(insertion) { + insertion = eol + insertion } - // Ensure separation from following content - if !strings.HasSuffix(insertion, "\n") && endByte < uint32(len(content)) && content[endByte] != '\n' { - insertion += "\n" + // Separate from following content. + if !endsWithNewline(insertion) && end < len(content) && content[end] != '\n' { + insertion += eol } - result = append(result, content[:endByte]...) - result = append(result, []byte(insertion)...) - result = append(result, content[endByte:]...) + result = append(result, content[:end]...) + result = append(result, insertion...) + result = append(result, content[end:]...) case EditDelete: - result = append(result, content[:startByte]...) - result = append(result, content[endByte:]...) + result = append(result, content[:start]...) + result = append(result, content[end:]...) default: - return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", edit.Operation)) + return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", op)) } return result, nil } -// detectIndentation detects the indentation at a given byte position. -func detectIndentation(content []byte, bytePos int) string { - // Find the start of the line - lineStart := bytePos - for lineStart > 0 && content[lineStart-1] != '\n' { - lineStart-- - } - - // Extract leading whitespace - var indent strings.Builder - for i := lineStart; i < bytePos && i < len(content); i++ { - c := content[i] - if c == ' ' || c == '\t' { - indent.WriteByte(c) - } else { - break - } - } - - return indent.String() -} - -// indentContent applies indentation to multi-line content. -func indentContent(content string, indent string) string { - if indent == "" { - return content - } - - lines := strings.Split(content, "\n") - for i, line := range lines { - if i > 0 && line != "" { - lines[i] = indent + line - } - } - - return strings.Join(lines, "\n") -} +// noNewlineMarker is the git-style annotation emitted after a diff line whose source +// version has no trailing newline. +const noNewlineMarker = "\\ No newline at end of file\n" // diffLine represents a single line in the diff with its type and content. type diffLine struct { @@ -582,32 +588,56 @@ type diffLine struct { newN int // 1-based line number in modified (0 if delete) } +// indexRange is an inclusive [start, end] range of diffLine indices forming one hunk. +type indexRange struct{ start, end int } + // generateDiff creates a unified diff between original and modified content. -// Uses line-level Myers diff algorithm and outputs a proper unified diff -// with context lines (3 before/after each change, merging close hunks). +// Uses a line-level Myers diff and outputs a unified diff with 3 lines of context +// before/after each change, merging close hunks. func (e *Engine) generateDiff(original, modified, filename string) string { dmp := e.dmp - // Use line-level diffing: encode each line as a single character, - // diff the encoded strings, then decode back to real lines. + // Line-level diffing: encode each line as a single rune, diff the encoded strings, + // then decode back to real lines. chars1, chars2, lineArray := dmp.DiffLinesToChars(original, modified) - diffs := dmp.DiffMain(chars1, chars2, false) - diffs = dmp.DiffCharsToLines(diffs, lineArray) - - // Cleanup for readability + diffs := dmp.DiffCharsToLines(dmp.DiffMain(chars1, chars2, false), lineArray) diffs = dmp.DiffCleanupSemantic(diffs) - // Flatten diffs into individual lines with line numbers - var lines []diffLine - oldLine := 1 - newLine := 1 + // Track whether each version lacks a final newline, so the diff is annotated + // git-style ("\ No newline at end of file") instead of implying a phantom one. + origNoEOL := len(original) > 0 && !strings.HasSuffix(original, "\n") + modNoEOL := len(modified) > 0 && !strings.HasSuffix(modified, "\n") + + lines, maxOldN, maxNewN := flattenDiffLines(diffs) + + ranges := diffHunkRanges(lines) + if len(ranges) == 0 { + return "" // no changes + } + + var buf bytes.Buffer + fmt.Fprintf(&buf, "--- %s\n", filename) + fmt.Fprintf(&buf, "+++ %s\n", filename) + for _, r := range ranges { + oldStart, oldCount, newStart, newCount := hunkBounds(lines, r.start, r.end) + fmt.Fprintf(&buf, "@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount) + writeDiffBody(&buf, lines, r.start, r.end, origNoEOL, modNoEOL, maxOldN, maxNewN) + } + return buf.String() +} + +// flattenDiffLines expands diff segments into per-line records with 1-based line numbers, +// returning the lines plus the final line number of each version (for no-newline marking). +func flattenDiffLines(diffs []diffmatchpatch.Diff) (lines []diffLine, maxOldN, maxNewN int) { + oldLine, newLine := 1, 1 for _, d := range diffs { - rawLines := strings.SplitAfter(d.Text, "\n") - for _, raw := range rawLines { + for _, raw := range strings.SplitAfter(d.Text, "\n") { if raw == "" { continue } - text := strings.TrimSuffix(raw, "\n") + // Strip the terminator for display; also drop a trailing CR so CRLF files + // do not leak raw carriage returns into the rendered diff. + text := strings.TrimSuffix(strings.TrimSuffix(raw, "\n"), "\r") switch d.Type { case diffmatchpatch.DiffEqual: lines = append(lines, diffLine{op: d.Type, text: text, oldN: oldLine, newN: newLine}) @@ -622,97 +652,88 @@ func (e *Engine) generateDiff(original, modified, filename string) string { } } } + return lines, oldLine - 1, newLine - 1 +} - // Identify indices of changed lines +// diffHunkRanges returns the inclusive index ranges to emit: each changed line padded by +// 3 lines of context, with overlapping/adjacent ranges merged. +func diffHunkRanges(lines []diffLine) []indexRange { const contextSize = 3 - var changedIndices []int - for i, l := range lines { - if l.op != diffmatchpatch.DiffEqual { - changedIndices = append(changedIndices, i) - } - } - - if len(changedIndices) == 0 { - return "" // no changes - } - - // Build inclusion ranges: for each changed line, include contextSize lines before/after. - // Merge overlapping or adjacent ranges (gap <= 2*contextSize = 6 context lines). - type indexRange struct{ start, end int } // inclusive var ranges []indexRange - for _, ci := range changedIndices { - rStart := ci - contextSize - if rStart < 0 { - rStart = 0 - } - rEnd := ci + contextSize - if rEnd >= len(lines) { - rEnd = len(lines) - 1 + for i, l := range lines { + if l.op == diffmatchpatch.DiffEqual { + continue } + rStart := max(i-contextSize, 0) + rEnd := min(i+contextSize, len(lines)-1) if len(ranges) > 0 && rStart <= ranges[len(ranges)-1].end+1 { - // Merge with previous range - ranges[len(ranges)-1].end = rEnd + ranges[len(ranges)-1].end = rEnd // merge with previous } else { ranges = append(ranges, indexRange{rStart, rEnd}) } } + return ranges +} - // Emit unified diff - var buf bytes.Buffer - buf.WriteString(fmt.Sprintf("--- %s\n", filename)) - buf.WriteString(fmt.Sprintf("+++ %s\n", filename)) - - for _, r := range ranges { - // Determine hunk header line numbers - var oldStart, oldCount, newStart, newCount int - for i := r.start; i <= r.end; i++ { - l := lines[i] - switch l.op { - case diffmatchpatch.DiffEqual: - if oldCount == 0 { - oldStart = l.oldN - } - if newCount == 0 { - newStart = l.newN - } - oldCount++ - newCount++ - case diffmatchpatch.DiffDelete: - if oldCount == 0 { - oldStart = l.oldN - } - if newCount == 0 { - // Set newStart from context or next available - newStart = l.oldN // approximate - } - oldCount++ - case diffmatchpatch.DiffInsert: - if newCount == 0 { - newStart = l.newN - } - if oldCount == 0 { - oldStart = l.newN // approximate - } - newCount++ +// hunkBounds computes the unified-diff hunk header line numbers and counts for +// lines[start:end+1]. newStart/oldStart for one-sided lines are approximate. +func hunkBounds(lines []diffLine, start, end int) (oldStart, oldCount, newStart, newCount int) { + for i := start; i <= end; i++ { + l := lines[i] + switch l.op { + case diffmatchpatch.DiffEqual: + if oldCount == 0 { + oldStart = l.oldN } + if newCount == 0 { + newStart = l.newN + } + oldCount++ + newCount++ + case diffmatchpatch.DiffDelete: + if oldCount == 0 { + oldStart = l.oldN + } + if newCount == 0 { + newStart = l.oldN // approximate + } + oldCount++ + case diffmatchpatch.DiffInsert: + if newCount == 0 { + newStart = l.newN + } + if oldCount == 0 { + oldStart = l.newN // approximate + } + newCount++ } + } + return +} - buf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount)) - - for i := r.start; i <= r.end; i++ { - l := lines[i] - switch l.op { - case diffmatchpatch.DiffEqual: - buf.WriteString(fmt.Sprintf(" %s\n", l.text)) - case diffmatchpatch.DiffDelete: - buf.WriteString(fmt.Sprintf("-%s\n", l.text)) - case diffmatchpatch.DiffInsert: - buf.WriteString(fmt.Sprintf("+%s\n", l.text)) +// writeDiffBody writes the space/-/+ body lines for one hunk, appending the git-style +// no-newline marker after the final line of any version that lacks a trailing newline. +func writeDiffBody(buf *bytes.Buffer, lines []diffLine, start, end int, origNoEOL, modNoEOL bool, maxOldN, maxNewN int) { + for i := start; i <= end; i++ { + l := lines[i] + switch l.op { + case diffmatchpatch.DiffEqual: + fmt.Fprintf(buf, " %s\n", l.text) + if (origNoEOL && l.oldN == maxOldN) || (modNoEOL && l.newN == maxNewN) { + buf.WriteString(noNewlineMarker) + } + case diffmatchpatch.DiffDelete: + fmt.Fprintf(buf, "-%s\n", l.text) + if origNoEOL && l.oldN == maxOldN { + buf.WriteString(noNewlineMarker) + } + case diffmatchpatch.DiffInsert: + fmt.Fprintf(buf, "+%s\n", l.text) + if modNoEOL && l.newN == maxNewN { + buf.WriteString(noNewlineMarker) } } } - - return buf.String() } // resolveTextSelector finds the byte range for a text-based selection. @@ -831,57 +852,11 @@ func (e *Engine) findLineRange(content []byte, lineStart, lineEnd int) (start, e return start, end, nil } -// applyTextEditOperation applies a text edit operation. +// applyTextEditOperation applies a text-mode edit. Like AST mode, new_content is inserted +// verbatim (no auto-indentation) with its line endings normalized to the file's convention. func (e *Engine) applyTextEditOperation(op EditOperation, content []byte, start, end int, newContent string) ([]byte, error) { - // Detect indentation at the selection point - indentation := detectIndentation(content, start) - indentedContent := indentContent(newContent, indentation) - - var result []byte - - switch op { - case EditReplace: - result = append(result, content[:start]...) - result = append(result, []byte(indentedContent)...) - // Preserve trailing newline: if selection ended with \n but replacement doesn't, - // re-add it to prevent line merging - if end > start && content[end-1] == '\n' && !strings.HasSuffix(indentedContent, "\n") { - result = append(result, '\n') - } - result = append(result, content[end:]...) - - case EditInsertBefore: - insertion := indentedContent - if !strings.HasSuffix(insertion, "\n") { - insertion += "\n" - } - result = append(result, content[:start]...) - result = append(result, []byte(insertion)...) - result = append(result, content[start:]...) - - case EditInsertAfter: - insertion := indentedContent - // Ensure separation from preceding content - if end > 0 && content[end-1] != '\n' && !strings.HasPrefix(insertion, "\n") { - insertion = "\n" + insertion - } - // Ensure separation from following content - if !strings.HasSuffix(insertion, "\n") && end < len(content) && content[end] != '\n' { - insertion += "\n" - } - result = append(result, content[:end]...) - result = append(result, []byte(insertion)...) - result = append(result, content[end:]...) - - case EditDelete: - result = append(result, content[:start]...) - result = append(result, content[end:]...) - - default: - return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", op)) - } - - return result, nil + eol := detectLineEnding(content) + return spliceContent(op, content, start, end, normalizeLineEndings(newContent, eol), eol) } // truncateString truncates a string to maxLen with ellipsis. diff --git a/internal/edit/edit_test.go b/internal/edit/edit_test.go index cd098a8..f7abcf1 100644 --- a/internal/edit/edit_test.go +++ b/internal/edit/edit_test.go @@ -353,43 +353,6 @@ func Hello() { } } -func TestDetectIndentation(t *testing.T) { - tests := []struct { - name string - content string - want string - pos int - }{ - { - name: "no indent", - content: "func main() {}", - pos: 0, - want: "", - }, - { - name: "tab indent", - content: "func main() {\n\tprintln(\"hello\")\n}", - pos: 15, - want: "\t", - }, - { - name: "space indent", - content: "func main() {\n println(\"hello\")\n}", - pos: 18, - want: " ", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := detectIndentation([]byte(tt.content), tt.pos) - if got != tt.want { - t.Errorf("detectIndentation() = %q, want %q", got, tt.want) - } - }) - } -} - func TestGenerateDiff(t *testing.T) { original := "line1\nline2\nline3" modified := "line1\nmodified\nline3" diff --git a/internal/edit/escaping_newline_test.go b/internal/edit/escaping_newline_test.go new file mode 100644 index 0000000..c81136d --- /dev/null +++ b/internal/edit/escaping_newline_test.go @@ -0,0 +1,172 @@ +package edit + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/lukaszraczylo/mcp-filepuff/internal/parser" +) + +// mkEngine builds an Engine with a registry that is closed at test end. +func mkEngine(t *testing.T) *Engine { + t.Helper() + reg := parser.NewRegistry() + t.Cleanup(reg.Close) + return NewEngine(reg) +} + +// applyEditFile writes content to a temp file, applies the edit, and returns the on-disk bytes. +func applyEditFile(t *testing.T, name, content string, e *ASTEdit) string { + t.Helper() + dir := t.TempDir() + p := filepath.Join(dir, name) + if err := os.WriteFile(p, []byte(content), 0o600); err != nil { + t.Fatalf("write temp: %v", err) + } + e.File = p + res, err := mkEngine(t).Apply(context.Background(), e) + if err != nil { + t.Fatalf("apply: %v", err) + } + if !res.Success { + t.Fatalf("apply unsuccessful: %s", res.Error) + } + got, err := os.ReadFile(p) + if err != nil { + t.Fatalf("readback: %v", err) + } + return string(got) +} + +// assertAllCRLF fails if s contains any bare LF (not preceded by CR) or a doubled CR. +func assertAllCRLF(t *testing.T, s string) { + t.Helper() + if strings.Count(s, "\n") != strings.Count(s, "\r\n") { + t.Fatalf("file contains bare LF (mixed line endings): %q", s) + } + if strings.Contains(s, "\r\r") { + t.Fatalf("file contains doubled CR: %q", s) + } +} + +// ---- Cluster C: text-mode edits insert new_content verbatim (no auto-indentation) ---- + +func TestTextEditVerbatimNoAutoIndent(t *testing.T) { + // selector_text matches a token that sits at one tab of indentation, so the old + // code detected "\t" and re-indented continuation lines of new_content. Because + // new_content is itself already indented, that produced a DOUBLE tab on line 2. + src := "func f() {\n\tOLD\n}\n" + got := applyEditFile(t, "f.txt", src, &ASTEdit{ + Operation: EditReplace, + NewContent: "A\n\tB", // line 2 already carries its own tab + Selector: ASTSelector{Text: "OLD"}, + }) + want := "func f() {\n\tA\n\tB\n}\n" + if got != want { + t.Fatalf("text edit must insert new_content verbatim (no auto-indent).\nwant: %q\ngot: %q", want, got) + } +} + +// ---- Cluster B: edits preserve the file's original line-ending convention ---- + +func TestASTEditPreservesCRLF(t *testing.T) { + src := "package main\r\n\r\nfunc demo() {\r\n\tprintln(\"old\")\r\n}\r\n" + got := applyEditFile(t, "demo.go", src, &ASTEdit{ + Operation: EditReplace, + NewContent: "func demo() {\n\treturn\n}", + Selector: ASTSelector{Kind: "function_declaration", Name: "demo"}, + }) + assertAllCRLF(t, got) + if !strings.Contains(got, "func demo() {\r\n\treturn\r\n}") { + t.Fatalf("replacement not normalized to CRLF: %q", got) + } +} + +func TestTextEditReplacePreservesCRLF(t *testing.T) { + src := "alpha\r\nbravo\r\ncharlie\r\n" + got := applyEditFile(t, "f.txt", src, &ASTEdit{ + Operation: EditReplace, + NewContent: "BRAVO1\nBRAVO2", + Selector: ASTSelector{AtLine: 2, LineEnd: 2}, + }) + assertAllCRLF(t, got) + want := "alpha\r\nBRAVO1\r\nBRAVO2\r\ncharlie\r\n" + if got != want { + t.Fatalf("CRLF text replace.\nwant: %q\ngot: %q", want, got) + } +} + +func TestTextEditInsertAfterPreservesCRLF(t *testing.T) { + src := "alpha\r\nbravo\r\n" + got := applyEditFile(t, "f.txt", src, &ASTEdit{ + Operation: EditInsertAfter, + NewContent: "INSERTED", + Selector: ASTSelector{AtLine: 1, LineEnd: 1}, + }) + assertAllCRLF(t, got) + want := "alpha\r\nINSERTED\r\nbravo\r\n" + if got != want { + t.Fatalf("CRLF insert_after.\nwant: %q\ngot: %q", want, got) + } +} + +func TestLFFileStaysLFWhenNewContentHasCRLF(t *testing.T) { + src := "alpha\nbravo\ncharlie\n" + got := applyEditFile(t, "f.txt", src, &ASTEdit{ + Operation: EditReplace, + NewContent: "B1\r\nB2", // stray CRLF in new_content must be normalized to the LF file + Selector: ASTSelector{AtLine: 2, LineEnd: 2}, + }) + if strings.Contains(got, "\r") { + t.Fatalf("LF file must not gain CR: %q", got) + } + want := "alpha\nB1\nB2\ncharlie\n" + if got != want { + t.Fatalf("LF normalization.\nwant: %q\ngot: %q", want, got) + } +} + +// ---- Cluster D: diff rendering ---- + +func TestDiffMarksNoNewlineAtEOF(t *testing.T) { + dir := t.TempDir() + p := filepath.Join(dir, "f.txt") + if err := os.WriteFile(p, []byte("alpha\nbravo"), 0o600); err != nil { // no trailing newline + t.Fatalf("write: %v", err) + } + res, err := mkEngine(t).Preview(context.Background(), &ASTEdit{ + File: p, + Operation: EditReplace, + NewContent: "BRAVO", + Selector: ASTSelector{AtLine: 2, LineEnd: 2}, + }) + if err != nil || !res.Success { + t.Fatalf("preview failed: %v %s", err, res.Error) + } + if !strings.Contains(res.Diff, "No newline at end of file") { + t.Fatalf("diff should mark missing final newline, got:\n%s", res.Diff) + } +} + +func TestDiffHasNoRawCarriageReturn(t *testing.T) { + dir := t.TempDir() + p := filepath.Join(dir, "f.txt") + if err := os.WriteFile(p, []byte("alpha\r\nbravo\r\n"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + res, err := mkEngine(t).Preview(context.Background(), &ASTEdit{ + File: p, + Operation: EditReplace, + NewContent: "BRAVO", + Selector: ASTSelector{AtLine: 2, LineEnd: 2}, + }) + if err != nil || !res.Success { + t.Fatalf("preview failed: %v %s", err, res.Error) + } + if strings.Contains(res.Diff, "\r") { + t.Fatalf("diff display must not contain raw CR, got:\n%q", res.Diff) + } +} diff --git a/internal/parser/strip.go b/internal/parser/strip.go index c4cc243..e427c05 100644 --- a/internal/parser/strip.go +++ b/internal/parser/strip.go @@ -81,22 +81,21 @@ func stripLicense(content string) (string, bool) { } } - // Python/hash-style leading comment block + // Python/hash-style leading comment block. Only contiguous "#" lines belong to the + // header; a blank line ends it and is preserved as a separator (rather than being + // greedily swallowed and collapsed away). if strings.HasPrefix(trimmed, "#") { lines := strings.Split(trimmed, "\n") - var commentLines []string - var rest []string - inComment := true + var commentLines, rest []string for i, l := range lines { - if inComment && (strings.HasPrefix(l, "#") || strings.TrimSpace(l) == "") { + if strings.HasPrefix(l, "#") { commentLines = append(commentLines, l) - } else { - rest = lines[i:] - break + continue } + rest = lines[i:] + break } - block := strings.Join(commentLines, "\n") - lower := strings.ToLower(block) + lower := strings.ToLower(strings.Join(commentLines, "\n")) if strings.Contains(lower, "copyright") || strings.Contains(lower, "license") || strings.Contains(lower, "spdx-license-identifier") { @@ -240,60 +239,109 @@ func stripBlockComments(content string, lang protocol.Language) (string, bool) { return stripCStyleBlockComments(content) } -// stripCStyleBlockComments removes /* ... */ from content. +// trimTrailingLineWhitespace drops trailing spaces/tabs from out (back to, but not past, +// the previous newline). Used when a standalone comment line is removed so its leading +// indentation does not linger as a whitespace-only line. +func trimTrailingLineWhitespace(out []byte) []byte { + for len(out) > 0 && (out[len(out)-1] == ' ' || out[len(out)-1] == '\t') { + out = out[:len(out)-1] + } + return out +} + +// skipLineTail advances i over trailing spaces/tabs and a CR, then a single LF — i.e. the +// remainder of a line after a standalone comment's closer, including its \n or \r\n +// terminator. Returns the new index. +func skipLineTail(content string, i int) int { + for i < len(content) && (content[i] == ' ' || content[i] == '\t' || content[i] == '\r') { + i++ + } + if i < len(content) && content[i] == '\n' { + i++ + } + return i +} + +// stripCStyleBlockComments removes /* ... */ comments. A comment that occupies a whole +// line (only whitespace before it) is removed together with that line's indentation and +// terminator; an inline comment (code precedes it) is removed in place, leaving the +// surrounding line — and crucially its terminator — intact so lines are never merged. func stripCStyleBlockComments(content string) (string, bool) { removed := false - var sb strings.Builder + out := make([]byte, 0, len(content)) + lineHasNonSpace := false i := 0 for i < len(content) { if i+1 < len(content) && content[i] == '/' && content[i+1] == '*' { - // find closing */ - end := strings.Index(content[i+2:], "*/") - if end >= 0 { + if end := strings.Index(content[i+2:], "*/"); end >= 0 { removed = true - // advance past */ - i = i + 2 + end + 2 - // consume trailing newline - if i < len(content) && content[i] == '\n' { - i++ + standalone := !lineHasNonSpace + i = i + 2 + end + 2 // advance past closing */ + if standalone { + out = trimTrailingLineWhitespace(out) + i = skipLineTail(content, i) + lineHasNonSpace = false } continue } } - sb.WriteByte(content[i]) + c := content[i] + switch c { + case '\n': + lineHasNonSpace = false + case ' ', '\t', '\r': + // whitespace: does not mark the line as having content + default: + lineHasNonSpace = true + } + out = append(out, c) i++ } if !removed { return content, false } - return sb.String(), true + return string(out), true } -// stripPythonDocstrings removes triple-quoted strings (""" and ”'). +// stripPythonDocstrings removes triple-quoted strings (""" and ”'). As with block +// comments, a standalone docstring line is removed along with its indentation and +// terminator, while an inline triple-quoted string leaves its line's terminator intact. func stripPythonDocstrings(content string) (string, bool) { removed := false - var sb strings.Builder + out := make([]byte, 0, len(content)) + lineHasNonSpace := false i := 0 for i < len(content) { if i+2 < len(content) { triple := content[i : i+3] if triple == `"""` || triple == `'''` { - end := strings.Index(content[i+3:], triple) - if end >= 0 { + if end := strings.Index(content[i+3:], triple); end >= 0 { removed = true + standalone := !lineHasNonSpace i = i + 3 + end + 3 - if i < len(content) && content[i] == '\n' { - i++ + if standalone { + out = trimTrailingLineWhitespace(out) + i = skipLineTail(content, i) + lineHasNonSpace = false } continue } } } - sb.WriteByte(content[i]) + c := content[i] + switch c { + case '\n': + lineHasNonSpace = false + case ' ', '\t', '\r': + // whitespace: does not mark the line as having content + default: + lineHasNonSpace = true + } + out = append(out, c) i++ } if !removed { return content, false } - return sb.String(), true + return string(out), true } diff --git a/internal/parser/strip_newline_test.go b/internal/parser/strip_newline_test.go new file mode 100644 index 0000000..e973a14 --- /dev/null +++ b/internal/parser/strip_newline_test.go @@ -0,0 +1,47 @@ +package parser + +import ( + "testing" + + "github.com/lukaszraczylo/mcp-filepuff/pkg/protocol" +) + +// An inline block comment (code before it on the same line) must not cause the following +// line to be merged onto it — the line's terminator must survive. +func TestStripBlockCommentInlineNoLineMerge(t *testing.T) { + got := StripContent("a := 1 /* note */\nb := 2\n", []StripFlag{StripBlockComments}, protocol.LangGo) + want := "a := 1 \nb := 2\n" + if got.Content != want { + t.Fatalf("inline block comment must not merge lines.\nwant: %q\ngot: %q", want, got.Content) + } +} + +// A standalone block-comment line (only whitespace before it) is removed in full, +// including its indentation and terminator — no stray blank/whitespace line left behind. +func TestStripBlockCommentStandaloneRemovesLine(t *testing.T) { + got := StripContent("x\n\t/* c */\ny\n", []StripFlag{StripBlockComments}, protocol.LangGo) + want := "x\ny\n" + if got.Content != want { + t.Fatalf("standalone block comment line must be removed cleanly.\nwant: %q\ngot: %q", want, got.Content) + } +} + +// On a CRLF file, removing a standalone block-comment line must consume the full \r\n +// terminator rather than leaving a stray blank (bare-CR) line. +func TestStripBlockCommentCRLFNoStrayBlank(t *testing.T) { + got := StripContent("code\r\n/* c */\r\nmore\r\n", []StripFlag{StripBlockComments}, protocol.LangGo) + want := "code\r\nmore\r\n" + if got.Content != want { + t.Fatalf("CRLF standalone block comment must not leave a stray blank line.\nwant: %q\ngot: %q", want, got.Content) + } +} + +// Stripping a hash-style license header must not greedily swallow the blank separator +// line that follows it. +func TestStripLicensePythonPreservesSeparatorBlank(t *testing.T) { + got := StripContent("# Copyright 2024\n# License MIT\n\ncode\n", []StripFlag{StripLicense}, protocol.LangPython) + want := "\ncode\n" + if got.Content != want { + t.Fatalf("python license strip must keep the blank separator.\nwant: %q\ngot: %q", want, got.Content) + } +} diff --git a/internal/server/handlers_edit.go b/internal/server/handlers_edit.go index 3148d63..a80d494 100644 --- a/internal/server/handlers_edit.go +++ b/internal/server/handlers_edit.go @@ -11,16 +11,6 @@ import ( "github.com/mark3labs/mcp-go/mcp" ) -// unescapeNewlines converts literal \n, \t, \" sequences to actual characters. -// This handles cases where MCP clients send double-escaped JSON strings. -func unescapeNewlines(s string) string { - s = strings.ReplaceAll(s, "\\n", "\n") - s = strings.ReplaceAll(s, "\\t", "\t") - s = strings.ReplaceAll(s, "\\\"", "\"") - s = strings.ReplaceAll(s, "\\\\", "\\") - return s -} - // handleEditApply handles the edit_apply tool. func (s *Server) handleEditApply(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { return s.handleEdit(ctx, request) @@ -51,8 +41,12 @@ func (s *Server) handleEdit(ctx context.Context, request mcp.CallToolRequest) (* return mcp.NewToolResultError("file is outside workspace root"), nil } + // new_content arrives already fully decoded by the JSON-RPC layer (mcp-go): + // JSON escapes such as \n, \t, \\, \" have been resolved to their real bytes. + // It must therefore be used verbatim — any further unescaping would corrupt + // legitimate backslash sequences in source code (string literals, regexes, + // Windows paths). See TestEditApplyPreservesBackslashSequences*. newContent := request.GetString("new_content", "") - newContent = unescapeNewlines(newContent) selectorName := request.GetString("selector_name", "") @@ -114,21 +108,26 @@ func (s *Server) handleEdit(ctx context.Context, request mcp.CallToolRequest) (* } } -// countDiffLines counts added (+) and removed (-) lines in a unified diff string. +// countDiffLines counts added (+) and removed (-) content lines in a unified diff. +// Only lines inside hunks are counted (everything after the first "@@" header), so the +// "---"/"+++" file headers are skipped structurally — and content whose own text starts +// with + or - is counted correctly rather than mistaken for a header. The git-style +// "\ No newline at end of file" marker is ignored. func countDiffLines(diff string) (added, removed int) { + inHunk := false for _, line := range strings.Split(diff, "\n") { - if len(line) == 0 { + if strings.HasPrefix(line, "@@") { + inHunk = true + continue + } + if !inHunk || line == "" { continue } switch line[0] { case '+': - if !strings.HasPrefix(line, "+++") { - added++ - } + added++ case '-': - if !strings.HasPrefix(line, "---") { - removed++ - } + removed++ } } return diff --git a/internal/server/handlers_edit_test.go b/internal/server/handlers_edit_test.go index 7787bd0..181aa05 100644 --- a/internal/server/handlers_edit_test.go +++ b/internal/server/handlers_edit_test.go @@ -1,47 +1,103 @@ package server -import "testing" +import ( + "context" + "os" + "strings" + "testing" -func TestUnescapeNewlines(t *testing.T) { - tests := []struct { - name string - input string - expected string - }{ - { - name: "no escapes", - input: "func test() {\n\treturn true;\n}", - expected: "func test() {\n\treturn true;\n}", - }, - { - name: "literal backslash n", - input: "func test() {\\n\\treturn true;\\n}", - expected: "func test() {\n\treturn true;\n}", - }, - { - name: "literal backslash t", - input: "func test() {\\n\\treturn true;\\n}", - expected: "func test() {\n\treturn true;\n}", - }, - { - name: "literal quotes", - input: `func test() {\n\treturn \"true\";\n}`, - expected: "func test() {\n\treturn \"true\";\n}", - }, + "github.com/mark3labs/mcp-go/mcp" +) - { - name: "mixed content", - input: "line1\\nline2\\tindented\\nline3", - expected: "line1\nline2\tindented\nline3", - }, +// callEdit invokes handleEditApply with the given args and returns the result. +func callEdit(t *testing.T, srv *Server, args map[string]any) *mcp.CallToolResult { + t.Helper() + req := mcp.CallToolRequest{} + req.Params.Arguments = args + res, err := srv.handleEditApply(context.Background(), req) + if err != nil { + t.Fatalf("handleEditApply error: %v", err) + } + if res == nil { + t.Fatal("handleEditApply returned nil result") + } + return res +} + +// TestEditApplyPreservesBackslashSequencesText is the primary regression guard for the +// double-unescape bug: new_content arrives from the JSON-RPC layer already fully decoded, +// so the handler must write it to disk verbatim. Literal backslash sequences (\n, \t, \", +// \\) that legitimately appear in source code must survive byte-for-byte. +func TestEditApplyPreservesBackslashSequencesText(t *testing.T) { + tmpDir := t.TempDir() + srv := newTestServer(t, tmpDir) + + f := writeFile(t, tmpDir, "note.txt", "PLACEHOLDER\n") + + // Exactly the bytes a client intends after JSON decoding: backslash sequences are + // real source text here, NOT escapes to be interpreted. + newContent := `printf("a\nb\tc"); re = \d+; path = "C:\\tmp"; q = \"` + + res := callEdit(t, srv, map[string]any{ + "file": f, + "operation": "replace", + "selector_text": "PLACEHOLDER", + "new_content": newContent, + "response": "none", + }) + if res.IsError { + t.Fatalf("edit returned error: %+v", res.Content) } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := unescapeNewlines(tt.input) - if result != tt.expected { - t.Errorf("unescapeNewlines(%q) = %q, expected %q", tt.input, result, tt.expected) - } - }) + got, err := os.ReadFile(f) + if err != nil { + t.Fatalf("read back: %v", err) + } + want := newContent + "\n" + if string(got) != want { + t.Fatalf("new_content not written verbatim (double-unescape regression).\nwant: %q\ngot: %q", want, string(got)) + } +} + +// TestCountDiffLinesHunkAware verifies the line counter skips file headers structurally and +// counts content lines whose own text begins with + or - (previously dropped as "+++"/"---"). +func TestCountDiffLinesHunkAware(t *testing.T) { + diff := "--- a.txt\n+++ a.txt\n@@ -1,3 +1,3 @@\n context\n+++plusprefix\n+normal add\n---minusprefix\n-normal del\n" + added, removed := countDiffLines(diff) + if added != 2 || removed != 2 { + t.Fatalf("hunk-aware diff count wrong: added=%d removed=%d (want 2 and 2)", added, removed) + } +} + +// TestEditApplyPreservesBackslashSequencesCode covers the AST/code path: the same verbatim +// guarantee must hold for syntactically-validated code files. +func TestEditApplyPreservesBackslashSequencesCode(t *testing.T) { + tmpDir := t.TempDir() + srv := newTestServer(t, tmpDir) + + src := "package main\n\nfunc demo() {\n\tprintln(\"old\")\n}\n" + f := writeFile(t, tmpDir, "demo.go", src) + + // Real tab indentation + literal backslash sequences inside string/raw-string literals. + newBody := "func demo() {\n\tprintln(\"a\\nb\\tc\")\n\t_ = `\\d+`\n\t_ = \"C:\\\\tmp\"\n}" + + res := callEdit(t, srv, map[string]any{ + "file": f, + "operation": "replace", + "selector_kind": "function_declaration", + "selector_name": "demo", + "new_content": newBody, + "response": "none", + }) + if res.IsError { + t.Fatalf("edit returned error: %+v", res.Content) + } + + got, err := os.ReadFile(f) + if err != nil { + t.Fatalf("read back: %v", err) + } + if !strings.Contains(string(got), newBody) { + t.Fatalf("backslash sequences corrupted in code edit.\nwant substring:\n%q\ngot file:\n%q", newBody, string(got)) } } diff --git a/internal/server/help_content.go b/internal/server/help_content.go index f17f2fb..2c51175 100644 --- a/internal/server/help_content.go +++ b/internal/server/help_content.go @@ -137,6 +137,8 @@ const helpEditApply = "# edit_apply — flags and examples\n\n" + "| `none` | Empty response (silent success) |\n\n" + "`compact_response=true` is a deprecated alias for `response=\"count\"` kept for pre-v2 compatibility.\n\n" + "For code files (Go, TypeScript, JavaScript, Python, C, C++, Rust) syntax is validated before writing — the edit is rejected if it would produce invalid syntax.\n\n" + + "## new_content handling\n\n" + + "`new_content` is inserted **verbatim**. Send it as normal JSON — real newlines (or JSON `\\n`), with quotes JSON-escaped as usual. The server does not re-interpret escape sequences, so literal backslash sequences in source (`\\n`, `\\t`, `\\\\`, regexes, Windows paths) are written exactly as given. Indentation is not auto-applied — include the leading whitespace you want. Line endings are normalized to the file's existing convention (LF or CRLF).\n\n" + "## Selector types\n\n" + "### AST-mode selectors (code files)\n" + "- `selector_kind` — AST node type (e.g. `function_declaration`, `class_declaration`)\n" + diff --git a/internal/server/server.go b/internal/server/server.go index 0552f23..9c68624 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -313,7 +313,7 @@ func (s *Server) registerTools() { mcp.Description("Edit operation: replace, insert_before, insert_after, delete"), ), mcp.WithString("new_content", - mcp.Description("New content (required for replace/insert operations)"), + mcp.Description("New content (required for replace/insert operations). Inserted verbatim: send it as normal JSON (real newlines, quotes JSON-escaped as usual). The server does NOT re-interpret escape sequences, so backslash sequences in source code (e.g. \\n, \\t, \\\\ inside string literals, regexes, or Windows paths) are preserved exactly. Indentation is not auto-applied — include the leading whitespace you want. Line endings are normalized to the file's existing convention (LF or CRLF)."), ), // AST-mode selectors (for code files) mcp.WithString("selector_kind",