refactor(edit): remove auto-indentation and add line-ending normalization

- [x] Remove auto-indentation from text mode edits (caller controls whitespace) - [x] Add line-ending detection and normalization for both AST and text modes - [x] Share edit logic via new `spliceContent` function for both modes - [x] Fix diff to emit "No newline at end of file" markers - [x] Fix diff to strip raw CR from CRLF file output - [x] Remove double-unescape of backslash sequences in new_content - [x] Fix countDiffLines to be hunk-aware (correctly count lines starting with +/-) - [x] Fix block-comment stripping to remove standalone lines cleanly - [x] Fix Python license header stripping to preserve separator blank lines
2026-06-05 22:23:50 +00:00 · 2026-05-29 00:17:36 +01:00
parent f1643e7b81
commit 9af2801b1b
9 changed files with 596 additions and 334 deletions
@@ -481,98 +481,104 @@ func (e *Engine) matchesSelector(sel ASTSelector, n *sitter.Node, content []byte
 	return true
 }

-// applyEdit applies the edit operation to the content.
-// AST mode uses exact byte positions — new_content is inserted verbatim without auto-indentation.
+// applyEdit applies an AST-mode edit. new_content is inserted verbatim — the caller
+// controls indentation — and only line endings are normalized to the file's convention.
 func (e *Engine) applyEdit(edit *ASTEdit, node *sitter.Node, content []byte) ([]byte, error) {
-	startByte := node.StartByte()
-	endByte := node.EndByte()
+	eol := detectLineEnding(content)
+	newContent := normalizeLineEndings(edit.NewContent, eol)
+	return spliceContent(edit.Operation, content, int(node.StartByte()), int(node.EndByte()), newContent, eol)
+}

-	newContent := edit.NewContent
+// detectLineEnding reports the file's dominant line-ending convention: "\r\n" when any
+// CRLF terminator is present, otherwise "\n".
+func detectLineEnding(content []byte) string {
+	if bytes.Contains(content, []byte("\r\n")) {
+		return "\r\n"
+	}
+	return "\n"
+}
+
+// normalizeLineEndings rewrites every line ending in s to eol. It first collapses CRLF to
+// LF, then expands to the target, so mixed input becomes uniform and new_content can never
+// introduce a line ending foreign to the file being edited.
+func normalizeLineEndings(s, eol string) string {
+	if s == "" {
+		return s
+	}
+	s = strings.ReplaceAll(s, "\r\n", "\n")
+	if eol != "\n" {
+		s = strings.ReplaceAll(s, "\n", eol)
+	}
+	return s
+}
+
+func endsWithNewline(s string) bool { return strings.HasSuffix(s, "\n") }
+
+func startsWithNewline(s string) bool {
+	return s != "" && (s[0] == '\n' || s[0] == '\r')
+}
+
+// spliceContent applies an edit operation by splicing newContent into content over the
+// byte range [start, end). It is shared by AST and text modes — once auto-indentation is
+// removed the two are identical. Restored terminators and separators use eol so the
+// file's line-ending convention is preserved.
+func spliceContent(op EditOperation, content []byte, start, end int, newContent, eol string) ([]byte, error) {
+	// A line-based selection on a CRLF file can land `end` between the \r (treated as
+	// line content) and the \n of a terminator. Pull it back so the full \r\n stays
+	// intact in the tail and is never split into a bare LF.
+	if end > start && end < len(content) && content[end-1] == '\r' && content[end] == '\n' {
+		end--
+	}

 	var result []byte
-
-	switch edit.Operation {
+	switch op {
 	case EditReplace:
-		result = append(result, content[:startByte]...)
-		result = append(result, []byte(newContent)...)
-		// Preserve trailing newline: if selection ended with \n but replacement doesn't,
-		// re-add it to prevent line merging
-		if endByte > startByte && content[endByte-1] == '\n' && !strings.HasSuffix(newContent, "\n") {
-			result = append(result, '\n')
+		result = append(result, content[:start]...)
+		result = append(result, newContent...)
+		// Restore a line terminator if the replaced range ended with one but the
+		// replacement does not, to prevent merging with the following line.
+		if end > start && content[end-1] == '\n' && !endsWithNewline(newContent) {
+			result = append(result, eol...)
 		}
-		result = append(result, content[endByte:]...)
+		result = append(result, content[end:]...)

 	case EditInsertBefore:
 		insertion := newContent
-		if !strings.HasSuffix(insertion, "\n") {
-			insertion += "\n"
+		if !endsWithNewline(insertion) {
+			insertion += eol
 		}
-		result = append(result, content[:startByte]...)
-		result = append(result, []byte(insertion)...)
-		result = append(result, content[startByte:]...)
+		result = append(result, content[:start]...)
+		result = append(result, insertion...)
+		result = append(result, content[start:]...)

 	case EditInsertAfter:
 		insertion := newContent
-		// Ensure separation from preceding content
-		if endByte > 0 && content[endByte-1] != '\n' && !strings.HasPrefix(insertion, "\n") {
-			insertion = "\n" + insertion
+		// Separate from preceding content.
+		if end > 0 && content[end-1] != '\n' && !startsWithNewline(insertion) {
+			insertion = eol + insertion
 		}
-		// Ensure separation from following content
-		if !strings.HasSuffix(insertion, "\n") && endByte < uint32(len(content)) && content[endByte] != '\n' {
-			insertion += "\n"
+		// Separate from following content.
+		if !endsWithNewline(insertion) && end < len(content) && content[end] != '\n' {
+			insertion += eol
 		}
-		result = append(result, content[:endByte]...)
-		result = append(result, []byte(insertion)...)
-		result = append(result, content[endByte:]...)
+		result = append(result, content[:end]...)
+		result = append(result, insertion...)
+		result = append(result, content[end:]...)

 	case EditDelete:
-		result = append(result, content[:startByte]...)
-		result = append(result, content[endByte:]...)
+		result = append(result, content[:start]...)
+		result = append(result, content[end:]...)

 	default:
-		return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", edit.Operation))
+		return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", op))
 	}

 	return result, nil
 }

-// detectIndentation detects the indentation at a given byte position.
-func detectIndentation(content []byte, bytePos int) string {
-	// Find the start of the line
-	lineStart := bytePos
-	for lineStart > 0 && content[lineStart-1] != '\n' {
-		lineStart--
-	}
-
-	// Extract leading whitespace
-	var indent strings.Builder
-	for i := lineStart; i < bytePos && i < len(content); i++ {
-		c := content[i]
-		if c == ' ' || c == '\t' {
-			indent.WriteByte(c)
-		} else {
-			break
-		}
-	}
-
-	return indent.String()
-}
-
-// indentContent applies indentation to multi-line content.
-func indentContent(content string, indent string) string {
-	if indent == "" {
-		return content
-	}
-
-	lines := strings.Split(content, "\n")
-	for i, line := range lines {
-		if i > 0 && line != "" {
-			lines[i] = indent + line
-		}
-	}
-
-	return strings.Join(lines, "\n")
-}
+// noNewlineMarker is the git-style annotation emitted after a diff line whose source
+// version has no trailing newline.
+const noNewlineMarker = "\\ No newline at end of file\n"

 // diffLine represents a single line in the diff with its type and content.
 type diffLine struct {
@@ -582,32 +588,56 @@ type diffLine struct {
 	newN int    // 1-based line number in modified (0 if delete)
 }

+// indexRange is an inclusive [start, end] range of diffLine indices forming one hunk.
+type indexRange struct{ start, end int }
+
 // generateDiff creates a unified diff between original and modified content.
-// Uses line-level Myers diff algorithm and outputs a proper unified diff
-// with context lines (3 before/after each change, merging close hunks).
+// Uses a line-level Myers diff and outputs a unified diff with 3 lines of context
+// before/after each change, merging close hunks.
 func (e *Engine) generateDiff(original, modified, filename string) string {
 	dmp := e.dmp

-	// Use line-level diffing: encode each line as a single character,
-	// diff the encoded strings, then decode back to real lines.
+	// Line-level diffing: encode each line as a single rune, diff the encoded strings,
+	// then decode back to real lines.
 	chars1, chars2, lineArray := dmp.DiffLinesToChars(original, modified)
-	diffs := dmp.DiffMain(chars1, chars2, false)
-	diffs = dmp.DiffCharsToLines(diffs, lineArray)
-
-	// Cleanup for readability
+	diffs := dmp.DiffCharsToLines(dmp.DiffMain(chars1, chars2, false), lineArray)
 	diffs = dmp.DiffCleanupSemantic(diffs)

-	// Flatten diffs into individual lines with line numbers
-	var lines []diffLine
-	oldLine := 1
-	newLine := 1
+	// Track whether each version lacks a final newline, so the diff is annotated
+	// git-style ("\ No newline at end of file") instead of implying a phantom one.
+	origNoEOL := len(original) > 0 && !strings.HasSuffix(original, "\n")
+	modNoEOL := len(modified) > 0 && !strings.HasSuffix(modified, "\n")
+
+	lines, maxOldN, maxNewN := flattenDiffLines(diffs)
+
+	ranges := diffHunkRanges(lines)
+	if len(ranges) == 0 {
+		return "" // no changes
+	}
+
+	var buf bytes.Buffer
+	fmt.Fprintf(&buf, "--- %s\n", filename)
+	fmt.Fprintf(&buf, "+++ %s\n", filename)
+	for _, r := range ranges {
+		oldStart, oldCount, newStart, newCount := hunkBounds(lines, r.start, r.end)
+		fmt.Fprintf(&buf, "@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount)
+		writeDiffBody(&buf, lines, r.start, r.end, origNoEOL, modNoEOL, maxOldN, maxNewN)
+	}
+	return buf.String()
+}
+
+// flattenDiffLines expands diff segments into per-line records with 1-based line numbers,
+// returning the lines plus the final line number of each version (for no-newline marking).
+func flattenDiffLines(diffs []diffmatchpatch.Diff) (lines []diffLine, maxOldN, maxNewN int) {
+	oldLine, newLine := 1, 1
 	for _, d := range diffs {
-		rawLines := strings.SplitAfter(d.Text, "\n")
-		for _, raw := range rawLines {
+		for _, raw := range strings.SplitAfter(d.Text, "\n") {
 			if raw == "" {
 				continue
 			}
-			text := strings.TrimSuffix(raw, "\n")
+			// Strip the terminator for display; also drop a trailing CR so CRLF files
+			// do not leak raw carriage returns into the rendered diff.
+			text := strings.TrimSuffix(strings.TrimSuffix(raw, "\n"), "\r")
 			switch d.Type {
 			case diffmatchpatch.DiffEqual:
 				lines = append(lines, diffLine{op: d.Type, text: text, oldN: oldLine, newN: newLine})
@@ -622,97 +652,88 @@ func (e *Engine) generateDiff(original, modified, filename string) string {
 			}
 		}
 	}
+	return lines, oldLine - 1, newLine - 1
+}

-	// Identify indices of changed lines
+// diffHunkRanges returns the inclusive index ranges to emit: each changed line padded by
+// 3 lines of context, with overlapping/adjacent ranges merged.
+func diffHunkRanges(lines []diffLine) []indexRange {
 	const contextSize = 3
-	var changedIndices []int
-	for i, l := range lines {
-		if l.op != diffmatchpatch.DiffEqual {
-			changedIndices = append(changedIndices, i)
-		}
-	}
-
-	if len(changedIndices) == 0 {
-		return "" // no changes
-	}
-
-	// Build inclusion ranges: for each changed line, include contextSize lines before/after.
-	// Merge overlapping or adjacent ranges (gap <= 2*contextSize = 6 context lines).
-	type indexRange struct{ start, end int } // inclusive
 	var ranges []indexRange
-	for _, ci := range changedIndices {
-		rStart := ci - contextSize
-		if rStart < 0 {
-			rStart = 0
-		}
-		rEnd := ci + contextSize
-		if rEnd >= len(lines) {
-			rEnd = len(lines) - 1
+	for i, l := range lines {
+		if l.op == diffmatchpatch.DiffEqual {
+			continue
 		}
+		rStart := max(i-contextSize, 0)
+		rEnd := min(i+contextSize, len(lines)-1)
 		if len(ranges) > 0 && rStart <= ranges[len(ranges)-1].end+1 {
-			// Merge with previous range
-			ranges[len(ranges)-1].end = rEnd
+			ranges[len(ranges)-1].end = rEnd // merge with previous
 		} else {
 			ranges = append(ranges, indexRange{rStart, rEnd})
 		}
 	}
+	return ranges
+}

-	// Emit unified diff
-	var buf bytes.Buffer
-	buf.WriteString(fmt.Sprintf("--- %s\n", filename))
-	buf.WriteString(fmt.Sprintf("+++ %s\n", filename))
-
-	for _, r := range ranges {
-		// Determine hunk header line numbers
-		var oldStart, oldCount, newStart, newCount int
-		for i := r.start; i <= r.end; i++ {
-			l := lines[i]
-			switch l.op {
-			case diffmatchpatch.DiffEqual:
-				if oldCount == 0 {
-					oldStart = l.oldN
-				}
-				if newCount == 0 {
-					newStart = l.newN
-				}
-				oldCount++
-				newCount++
-			case diffmatchpatch.DiffDelete:
-				if oldCount == 0 {
-					oldStart = l.oldN
-				}
-				if newCount == 0 {
-					// Set newStart from context or next available
-					newStart = l.oldN // approximate
-				}
-				oldCount++
-			case diffmatchpatch.DiffInsert:
-				if newCount == 0 {
-					newStart = l.newN
-				}
-				if oldCount == 0 {
-					oldStart = l.newN // approximate
-				}
-				newCount++
+// hunkBounds computes the unified-diff hunk header line numbers and counts for
+// lines[start:end+1]. newStart/oldStart for one-sided lines are approximate.
+func hunkBounds(lines []diffLine, start, end int) (oldStart, oldCount, newStart, newCount int) {
+	for i := start; i <= end; i++ {
+		l := lines[i]
+		switch l.op {
+		case diffmatchpatch.DiffEqual:
+			if oldCount == 0 {
+				oldStart = l.oldN
 			}
+			if newCount == 0 {
+				newStart = l.newN
+			}
+			oldCount++
+			newCount++
+		case diffmatchpatch.DiffDelete:
+			if oldCount == 0 {
+				oldStart = l.oldN
+			}
+			if newCount == 0 {
+				newStart = l.oldN // approximate
+			}
+			oldCount++
+		case diffmatchpatch.DiffInsert:
+			if newCount == 0 {
+				newStart = l.newN
+			}
+			if oldCount == 0 {
+				oldStart = l.newN // approximate
+			}
+			newCount++
 		}
+	}
+	return
+}

-		buf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount))
-
-		for i := r.start; i <= r.end; i++ {
-			l := lines[i]
-			switch l.op {
-			case diffmatchpatch.DiffEqual:
-				buf.WriteString(fmt.Sprintf(" %s\n", l.text))
-			case diffmatchpatch.DiffDelete:
-				buf.WriteString(fmt.Sprintf("-%s\n", l.text))
-			case diffmatchpatch.DiffInsert:
-				buf.WriteString(fmt.Sprintf("+%s\n", l.text))
+// writeDiffBody writes the space/-/+ body lines for one hunk, appending the git-style
+// no-newline marker after the final line of any version that lacks a trailing newline.
+func writeDiffBody(buf *bytes.Buffer, lines []diffLine, start, end int, origNoEOL, modNoEOL bool, maxOldN, maxNewN int) {
+	for i := start; i <= end; i++ {
+		l := lines[i]
+		switch l.op {
+		case diffmatchpatch.DiffEqual:
+			fmt.Fprintf(buf, " %s\n", l.text)
+			if (origNoEOL && l.oldN == maxOldN) || (modNoEOL && l.newN == maxNewN) {
+				buf.WriteString(noNewlineMarker)
+			}
+		case diffmatchpatch.DiffDelete:
+			fmt.Fprintf(buf, "-%s\n", l.text)
+			if origNoEOL && l.oldN == maxOldN {
+				buf.WriteString(noNewlineMarker)
+			}
+		case diffmatchpatch.DiffInsert:
+			fmt.Fprintf(buf, "+%s\n", l.text)
+			if modNoEOL && l.newN == maxNewN {
+				buf.WriteString(noNewlineMarker)
 			}
 		}
 	}
-
-	return buf.String()
 }

 // resolveTextSelector finds the byte range for a text-based selection.
@@ -831,57 +852,11 @@ func (e *Engine) findLineRange(content []byte, lineStart, lineEnd int) (start, e
 	return start, end, nil
 }

-// applyTextEditOperation applies a text edit operation.
+// applyTextEditOperation applies a text-mode edit. Like AST mode, new_content is inserted
+// verbatim (no auto-indentation) with its line endings normalized to the file's convention.
 func (e *Engine) applyTextEditOperation(op EditOperation, content []byte, start, end int, newContent string) ([]byte, error) {
-	// Detect indentation at the selection point
-	indentation := detectIndentation(content, start)
-	indentedContent := indentContent(newContent, indentation)
-
-	var result []byte
-
-	switch op {
-	case EditReplace:
-		result = append(result, content[:start]...)
-		result = append(result, []byte(indentedContent)...)
-		// Preserve trailing newline: if selection ended with \n but replacement doesn't,
-		// re-add it to prevent line merging
-		if end > start && content[end-1] == '\n' && !strings.HasSuffix(indentedContent, "\n") {
-			result = append(result, '\n')
-		}
-		result = append(result, content[end:]...)
-
-	case EditInsertBefore:
-		insertion := indentedContent
-		if !strings.HasSuffix(insertion, "\n") {
-			insertion += "\n"
-		}
-		result = append(result, content[:start]...)
-		result = append(result, []byte(insertion)...)
-		result = append(result, content[start:]...)
-
-	case EditInsertAfter:
-		insertion := indentedContent
-		// Ensure separation from preceding content
-		if end > 0 && content[end-1] != '\n' && !strings.HasPrefix(insertion, "\n") {
-			insertion = "\n" + insertion
-		}
-		// Ensure separation from following content
-		if !strings.HasSuffix(insertion, "\n") && end < len(content) && content[end] != '\n' {
-			insertion += "\n"
-		}
-		result = append(result, content[:end]...)
-		result = append(result, []byte(insertion)...)
-		result = append(result, content[end:]...)
-
-	case EditDelete:
-		result = append(result, content[:start]...)
-		result = append(result, content[end:]...)
-
-	default:
-		return nil, errors.NewInvalidEditError(fmt.Sprintf("unknown operation: %s", op))
-	}
-
-	return result, nil
+	eol := detectLineEnding(content)
+	return spliceContent(op, content, start, end, normalizeLineEndings(newContent, eol), eol)
 }

 // truncateString truncates a string to maxLen with ellipsis.
@@ -353,43 +353,6 @@ func Hello() {
 	}
 }

-func TestDetectIndentation(t *testing.T) {
-	tests := []struct {
-		name    string
-		content string
-		want    string
-		pos     int
-	}{
-		{
-			name:    "no indent",
-			content: "func main() {}",
-			pos:     0,
-			want:    "",
-		},
-		{
-			name:    "tab indent",
-			content: "func main() {\n\tprintln(\"hello\")\n}",
-			pos:     15,
-			want:    "\t",
-		},
-		{
-			name:    "space indent",
-			content: "func main() {\n    println(\"hello\")\n}",
-			pos:     18,
-			want:    "    ",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := detectIndentation([]byte(tt.content), tt.pos)
-			if got != tt.want {
-				t.Errorf("detectIndentation() = %q, want %q", got, tt.want)
-			}
-		})
-	}
-}
-
 func TestGenerateDiff(t *testing.T) {
 	original := "line1\nline2\nline3"
 	modified := "line1\nmodified\nline3"
@@ -0,0 +1,172 @@
+package edit
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/lukaszraczylo/mcp-filepuff/internal/parser"
+)
+
+// mkEngine builds an Engine with a registry that is closed at test end.
+func mkEngine(t *testing.T) *Engine {
+	t.Helper()
+	reg := parser.NewRegistry()
+	t.Cleanup(reg.Close)
+	return NewEngine(reg)
+}
+
+// applyEditFile writes content to a temp file, applies the edit, and returns the on-disk bytes.
+func applyEditFile(t *testing.T, name, content string, e *ASTEdit) string {
+	t.Helper()
+	dir := t.TempDir()
+	p := filepath.Join(dir, name)
+	if err := os.WriteFile(p, []byte(content), 0o600); err != nil {
+		t.Fatalf("write temp: %v", err)
+	}
+	e.File = p
+	res, err := mkEngine(t).Apply(context.Background(), e)
+	if err != nil {
+		t.Fatalf("apply: %v", err)
+	}
+	if !res.Success {
+		t.Fatalf("apply unsuccessful: %s", res.Error)
+	}
+	got, err := os.ReadFile(p)
+	if err != nil {
+		t.Fatalf("readback: %v", err)
+	}
+	return string(got)
+}
+
+// assertAllCRLF fails if s contains any bare LF (not preceded by CR) or a doubled CR.
+func assertAllCRLF(t *testing.T, s string) {
+	t.Helper()
+	if strings.Count(s, "\n") != strings.Count(s, "\r\n") {
+		t.Fatalf("file contains bare LF (mixed line endings): %q", s)
+	}
+	if strings.Contains(s, "\r\r") {
+		t.Fatalf("file contains doubled CR: %q", s)
+	}
+}
+
+// ---- Cluster C: text-mode edits insert new_content verbatim (no auto-indentation) ----
+
+func TestTextEditVerbatimNoAutoIndent(t *testing.T) {
+	// selector_text matches a token that sits at one tab of indentation, so the old
+	// code detected "\t" and re-indented continuation lines of new_content. Because
+	// new_content is itself already indented, that produced a DOUBLE tab on line 2.
+	src := "func f() {\n\tOLD\n}\n"
+	got := applyEditFile(t, "f.txt", src, &ASTEdit{
+		Operation:  EditReplace,
+		NewContent: "A\n\tB", // line 2 already carries its own tab
+		Selector:   ASTSelector{Text: "OLD"},
+	})
+	want := "func f() {\n\tA\n\tB\n}\n"
+	if got != want {
+		t.Fatalf("text edit must insert new_content verbatim (no auto-indent).\nwant: %q\ngot:  %q", want, got)
+	}
+}
+
+// ---- Cluster B: edits preserve the file's original line-ending convention ----
+
+func TestASTEditPreservesCRLF(t *testing.T) {
+	src := "package main\r\n\r\nfunc demo() {\r\n\tprintln(\"old\")\r\n}\r\n"
+	got := applyEditFile(t, "demo.go", src, &ASTEdit{
+		Operation:  EditReplace,
+		NewContent: "func demo() {\n\treturn\n}",
+		Selector:   ASTSelector{Kind: "function_declaration", Name: "demo"},
+	})
+	assertAllCRLF(t, got)
+	if !strings.Contains(got, "func demo() {\r\n\treturn\r\n}") {
+		t.Fatalf("replacement not normalized to CRLF: %q", got)
+	}
+}
+
+func TestTextEditReplacePreservesCRLF(t *testing.T) {
+	src := "alpha\r\nbravo\r\ncharlie\r\n"
+	got := applyEditFile(t, "f.txt", src, &ASTEdit{
+		Operation:  EditReplace,
+		NewContent: "BRAVO1\nBRAVO2",
+		Selector:   ASTSelector{AtLine: 2, LineEnd: 2},
+	})
+	assertAllCRLF(t, got)
+	want := "alpha\r\nBRAVO1\r\nBRAVO2\r\ncharlie\r\n"
+	if got != want {
+		t.Fatalf("CRLF text replace.\nwant: %q\ngot:  %q", want, got)
+	}
+}
+
+func TestTextEditInsertAfterPreservesCRLF(t *testing.T) {
+	src := "alpha\r\nbravo\r\n"
+	got := applyEditFile(t, "f.txt", src, &ASTEdit{
+		Operation:  EditInsertAfter,
+		NewContent: "INSERTED",
+		Selector:   ASTSelector{AtLine: 1, LineEnd: 1},
+	})
+	assertAllCRLF(t, got)
+	want := "alpha\r\nINSERTED\r\nbravo\r\n"
+	if got != want {
+		t.Fatalf("CRLF insert_after.\nwant: %q\ngot:  %q", want, got)
+	}
+}
+
+func TestLFFileStaysLFWhenNewContentHasCRLF(t *testing.T) {
+	src := "alpha\nbravo\ncharlie\n"
+	got := applyEditFile(t, "f.txt", src, &ASTEdit{
+		Operation:  EditReplace,
+		NewContent: "B1\r\nB2", // stray CRLF in new_content must be normalized to the LF file
+		Selector:   ASTSelector{AtLine: 2, LineEnd: 2},
+	})
+	if strings.Contains(got, "\r") {
+		t.Fatalf("LF file must not gain CR: %q", got)
+	}
+	want := "alpha\nB1\nB2\ncharlie\n"
+	if got != want {
+		t.Fatalf("LF normalization.\nwant: %q\ngot:  %q", want, got)
+	}
+}
+
+// ---- Cluster D: diff rendering ----
+
+func TestDiffMarksNoNewlineAtEOF(t *testing.T) {
+	dir := t.TempDir()
+	p := filepath.Join(dir, "f.txt")
+	if err := os.WriteFile(p, []byte("alpha\nbravo"), 0o600); err != nil { // no trailing newline
+		t.Fatalf("write: %v", err)
+	}
+	res, err := mkEngine(t).Preview(context.Background(), &ASTEdit{
+		File:       p,
+		Operation:  EditReplace,
+		NewContent: "BRAVO",
+		Selector:   ASTSelector{AtLine: 2, LineEnd: 2},
+	})
+	if err != nil || !res.Success {
+		t.Fatalf("preview failed: %v %s", err, res.Error)
+	}
+	if !strings.Contains(res.Diff, "No newline at end of file") {
+		t.Fatalf("diff should mark missing final newline, got:\n%s", res.Diff)
+	}
+}
+
+func TestDiffHasNoRawCarriageReturn(t *testing.T) {
+	dir := t.TempDir()
+	p := filepath.Join(dir, "f.txt")
+	if err := os.WriteFile(p, []byte("alpha\r\nbravo\r\n"), 0o600); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	res, err := mkEngine(t).Preview(context.Background(), &ASTEdit{
+		File:       p,
+		Operation:  EditReplace,
+		NewContent: "BRAVO",
+		Selector:   ASTSelector{AtLine: 2, LineEnd: 2},
+	})
+	if err != nil || !res.Success {
+		t.Fatalf("preview failed: %v %s", err, res.Error)
+	}
+	if strings.Contains(res.Diff, "\r") {
+		t.Fatalf("diff display must not contain raw CR, got:\n%q", res.Diff)
+	}
+}
@@ -81,22 +81,21 @@ func stripLicense(content string) (string, bool) {
 		}
 	}

-	// Python/hash-style leading comment block
+	// Python/hash-style leading comment block. Only contiguous "#" lines belong to the
+	// header; a blank line ends it and is preserved as a separator (rather than being
+	// greedily swallowed and collapsed away).
 	if strings.HasPrefix(trimmed, "#") {
 		lines := strings.Split(trimmed, "\n")
-		var commentLines []string
-		var rest []string
-		inComment := true
+		var commentLines, rest []string
 		for i, l := range lines {
-			if inComment && (strings.HasPrefix(l, "#") || strings.TrimSpace(l) == "") {
+			if strings.HasPrefix(l, "#") {
 				commentLines = append(commentLines, l)
-			} else {
-				rest = lines[i:]
-				break
+				continue
 			}
+			rest = lines[i:]
+			break
 		}
-		block := strings.Join(commentLines, "\n")
-		lower := strings.ToLower(block)
+		lower := strings.ToLower(strings.Join(commentLines, "\n"))
 		if strings.Contains(lower, "copyright") ||
 			strings.Contains(lower, "license") ||
 			strings.Contains(lower, "spdx-license-identifier") {
@@ -240,60 +239,109 @@ func stripBlockComments(content string, lang protocol.Language) (string, bool) {
 	return stripCStyleBlockComments(content)
 }

-// stripCStyleBlockComments removes /* ... */ from content.
+// trimTrailingLineWhitespace drops trailing spaces/tabs from out (back to, but not past,
+// the previous newline). Used when a standalone comment line is removed so its leading
+// indentation does not linger as a whitespace-only line.
+func trimTrailingLineWhitespace(out []byte) []byte {
+	for len(out) > 0 && (out[len(out)-1] == ' ' || out[len(out)-1] == '\t') {
+		out = out[:len(out)-1]
+	}
+	return out
+}
+
+// skipLineTail advances i over trailing spaces/tabs and a CR, then a single LF — i.e. the
+// remainder of a line after a standalone comment's closer, including its \n or \r\n
+// terminator. Returns the new index.
+func skipLineTail(content string, i int) int {
+	for i < len(content) && (content[i] == ' ' || content[i] == '\t' || content[i] == '\r') {
+		i++
+	}
+	if i < len(content) && content[i] == '\n' {
+		i++
+	}
+	return i
+}
+
+// stripCStyleBlockComments removes /* ... */ comments. A comment that occupies a whole
+// line (only whitespace before it) is removed together with that line's indentation and
+// terminator; an inline comment (code precedes it) is removed in place, leaving the
+// surrounding line — and crucially its terminator — intact so lines are never merged.
 func stripCStyleBlockComments(content string) (string, bool) {
 	removed := false
-	var sb strings.Builder
+	out := make([]byte, 0, len(content))
+	lineHasNonSpace := false
 	i := 0
 	for i < len(content) {
 		if i+1 < len(content) && content[i] == '/' && content[i+1] == '*' {
-			// find closing */
-			end := strings.Index(content[i+2:], "*/")
-			if end >= 0 {
+			if end := strings.Index(content[i+2:], "*/"); end >= 0 {
 				removed = true
-				// advance past */
-				i = i + 2 + end + 2
-				// consume trailing newline
-				if i < len(content) && content[i] == '\n' {
-					i++
+				standalone := !lineHasNonSpace
+				i = i + 2 + end + 2 // advance past closing */
+				if standalone {
+					out = trimTrailingLineWhitespace(out)
+					i = skipLineTail(content, i)
+					lineHasNonSpace = false
 				}
 				continue
 			}
 		}
-		sb.WriteByte(content[i])
+		c := content[i]
+		switch c {
+		case '\n':
+			lineHasNonSpace = false
+		case ' ', '\t', '\r':
+			// whitespace: does not mark the line as having content
+		default:
+			lineHasNonSpace = true
+		}
+		out = append(out, c)
 		i++
 	}
 	if !removed {
 		return content, false
 	}
-	return sb.String(), true
+	return string(out), true
 }

-// stripPythonDocstrings removes triple-quoted strings (""" and ”').
+// stripPythonDocstrings removes triple-quoted strings (""" and ”'). As with block
+// comments, a standalone docstring line is removed along with its indentation and
+// terminator, while an inline triple-quoted string leaves its line's terminator intact.
 func stripPythonDocstrings(content string) (string, bool) {
 	removed := false
-	var sb strings.Builder
+	out := make([]byte, 0, len(content))
+	lineHasNonSpace := false
 	i := 0
 	for i < len(content) {
 		if i+2 < len(content) {
 			triple := content[i : i+3]
 			if triple == `"""` || triple == `'''` {
-				end := strings.Index(content[i+3:], triple)
-				if end >= 0 {
+				if end := strings.Index(content[i+3:], triple); end >= 0 {
 					removed = true
+					standalone := !lineHasNonSpace
 					i = i + 3 + end + 3
-					if i < len(content) && content[i] == '\n' {
-						i++
+					if standalone {
+						out = trimTrailingLineWhitespace(out)
+						i = skipLineTail(content, i)
+						lineHasNonSpace = false
 					}
 					continue
 				}
 			}
 		}
-		sb.WriteByte(content[i])
+		c := content[i]
+		switch c {
+		case '\n':
+			lineHasNonSpace = false
+		case ' ', '\t', '\r':
+			// whitespace: does not mark the line as having content
+		default:
+			lineHasNonSpace = true
+		}
+		out = append(out, c)
 		i++
 	}
 	if !removed {
 		return content, false
 	}
-	return sb.String(), true
+	return string(out), true
 }
@@ -0,0 +1,47 @@
+package parser
+
+import (
+	"testing"
+
+	"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
+)
+
+// An inline block comment (code before it on the same line) must not cause the following
+// line to be merged onto it — the line's terminator must survive.
+func TestStripBlockCommentInlineNoLineMerge(t *testing.T) {
+	got := StripContent("a := 1 /* note */\nb := 2\n", []StripFlag{StripBlockComments}, protocol.LangGo)
+	want := "a := 1 \nb := 2\n"
+	if got.Content != want {
+		t.Fatalf("inline block comment must not merge lines.\nwant: %q\ngot:  %q", want, got.Content)
+	}
+}
+
+// A standalone block-comment line (only whitespace before it) is removed in full,
+// including its indentation and terminator — no stray blank/whitespace line left behind.
+func TestStripBlockCommentStandaloneRemovesLine(t *testing.T) {
+	got := StripContent("x\n\t/* c */\ny\n", []StripFlag{StripBlockComments}, protocol.LangGo)
+	want := "x\ny\n"
+	if got.Content != want {
+		t.Fatalf("standalone block comment line must be removed cleanly.\nwant: %q\ngot:  %q", want, got.Content)
+	}
+}
+
+// On a CRLF file, removing a standalone block-comment line must consume the full \r\n
+// terminator rather than leaving a stray blank (bare-CR) line.
+func TestStripBlockCommentCRLFNoStrayBlank(t *testing.T) {
+	got := StripContent("code\r\n/* c */\r\nmore\r\n", []StripFlag{StripBlockComments}, protocol.LangGo)
+	want := "code\r\nmore\r\n"
+	if got.Content != want {
+		t.Fatalf("CRLF standalone block comment must not leave a stray blank line.\nwant: %q\ngot:  %q", want, got.Content)
+	}
+}
+
+// Stripping a hash-style license header must not greedily swallow the blank separator
+// line that follows it.
+func TestStripLicensePythonPreservesSeparatorBlank(t *testing.T) {
+	got := StripContent("# Copyright 2024\n# License MIT\n\ncode\n", []StripFlag{StripLicense}, protocol.LangPython)
+	want := "\ncode\n"
+	if got.Content != want {
+		t.Fatalf("python license strip must keep the blank separator.\nwant: %q\ngot:  %q", want, got.Content)
+	}
+}
@@ -11,16 +11,6 @@ import (
 	"github.com/mark3labs/mcp-go/mcp"
 )

-// unescapeNewlines converts literal \n, \t, \" sequences to actual characters.
-// This handles cases where MCP clients send double-escaped JSON strings.
-func unescapeNewlines(s string) string {
-	s = strings.ReplaceAll(s, "\\n", "\n")
-	s = strings.ReplaceAll(s, "\\t", "\t")
-	s = strings.ReplaceAll(s, "\\\"", "\"")
-	s = strings.ReplaceAll(s, "\\\\", "\\")
-	return s
-}
-
 // handleEditApply handles the edit_apply tool.
 func (s *Server) handleEditApply(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
 	return s.handleEdit(ctx, request)
@@ -51,8 +41,12 @@ func (s *Server) handleEdit(ctx context.Context, request mcp.CallToolRequest) (*
 		return mcp.NewToolResultError("file is outside workspace root"), nil
 	}

+	// new_content arrives already fully decoded by the JSON-RPC layer (mcp-go):
+	// JSON escapes such as \n, \t, \\, \" have been resolved to their real bytes.
+	// It must therefore be used verbatim — any further unescaping would corrupt
+	// legitimate backslash sequences in source code (string literals, regexes,
+	// Windows paths). See TestEditApplyPreservesBackslashSequences*.
 	newContent := request.GetString("new_content", "")
-	newContent = unescapeNewlines(newContent)

 	selectorName := request.GetString("selector_name", "")

@@ -114,21 +108,26 @@ func (s *Server) handleEdit(ctx context.Context, request mcp.CallToolRequest) (*
 	}
 }

-// countDiffLines counts added (+) and removed (-) lines in a unified diff string.
+// countDiffLines counts added (+) and removed (-) content lines in a unified diff.
+// Only lines inside hunks are counted (everything after the first "@@" header), so the
+// "---"/"+++" file headers are skipped structurally — and content whose own text starts
+// with + or - is counted correctly rather than mistaken for a header. The git-style
+// "\ No newline at end of file" marker is ignored.
 func countDiffLines(diff string) (added, removed int) {
+	inHunk := false
 	for _, line := range strings.Split(diff, "\n") {
-		if len(line) == 0 {
+		if strings.HasPrefix(line, "@@") {
+			inHunk = true
+			continue
+		}
+		if !inHunk || line == "" {
 			continue
 		}
 		switch line[0] {
 		case '+':
-			if !strings.HasPrefix(line, "+++") {
-				added++
-			}
+			added++
 		case '-':
-			if !strings.HasPrefix(line, "---") {
-				removed++
-			}
+			removed++
 		}
 	}
 	return
@@ -1,47 +1,103 @@
 package server

-import "testing"
+import (
+	"context"
+	"os"
+	"strings"
+	"testing"

-func TestUnescapeNewlines(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    string
-		expected string
-	}{
-		{
-			name:     "no escapes",
-			input:    "func test() {\n\treturn true;\n}",
-			expected: "func test() {\n\treturn true;\n}",
-		},
-		{
-			name:     "literal backslash n",
-			input:    "func test() {\\n\\treturn true;\\n}",
-			expected: "func test() {\n\treturn true;\n}",
-		},
-		{
-			name:     "literal backslash t",
-			input:    "func test() {\\n\\treturn true;\\n}",
-			expected: "func test() {\n\treturn true;\n}",
-		},
-		{
-			name:     "literal quotes",
-			input:    `func test() {\n\treturn \"true\";\n}`,
-			expected: "func test() {\n\treturn \"true\";\n}",
-		},
+	"github.com/mark3labs/mcp-go/mcp"
+)

-		{
-			name:     "mixed content",
-			input:    "line1\\nline2\\tindented\\nline3",
-			expected: "line1\nline2\tindented\nline3",
-		},
+// callEdit invokes handleEditApply with the given args and returns the result.
+func callEdit(t *testing.T, srv *Server, args map[string]any) *mcp.CallToolResult {
+	t.Helper()
+	req := mcp.CallToolRequest{}
+	req.Params.Arguments = args
+	res, err := srv.handleEditApply(context.Background(), req)
+	if err != nil {
+		t.Fatalf("handleEditApply error: %v", err)
+	}
+	if res == nil {
+		t.Fatal("handleEditApply returned nil result")
+	}
+	return res
+}
+
+// TestEditApplyPreservesBackslashSequencesText is the primary regression guard for the
+// double-unescape bug: new_content arrives from the JSON-RPC layer already fully decoded,
+// so the handler must write it to disk verbatim. Literal backslash sequences (\n, \t, \",
+// \\) that legitimately appear in source code must survive byte-for-byte.
+func TestEditApplyPreservesBackslashSequencesText(t *testing.T) {
+	tmpDir := t.TempDir()
+	srv := newTestServer(t, tmpDir)
+
+	f := writeFile(t, tmpDir, "note.txt", "PLACEHOLDER\n")
+
+	// Exactly the bytes a client intends after JSON decoding: backslash sequences are
+	// real source text here, NOT escapes to be interpreted.
+	newContent := `printf("a\nb\tc"); re = \d+; path = "C:\\tmp"; q = \"`
+
+	res := callEdit(t, srv, map[string]any{
+		"file":          f,
+		"operation":     "replace",
+		"selector_text": "PLACEHOLDER",
+		"new_content":   newContent,
+		"response":      "none",
+	})
+	if res.IsError {
+		t.Fatalf("edit returned error: %+v", res.Content)
 	}

-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := unescapeNewlines(tt.input)
-			if result != tt.expected {
-				t.Errorf("unescapeNewlines(%q) = %q, expected %q", tt.input, result, tt.expected)
-			}
-		})
+	got, err := os.ReadFile(f)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := newContent + "\n"
+	if string(got) != want {
+		t.Fatalf("new_content not written verbatim (double-unescape regression).\nwant: %q\ngot:  %q", want, string(got))
+	}
+}
+
+// TestCountDiffLinesHunkAware verifies the line counter skips file headers structurally and
+// counts content lines whose own text begins with + or - (previously dropped as "+++"/"---").
+func TestCountDiffLinesHunkAware(t *testing.T) {
+	diff := "--- a.txt\n+++ a.txt\n@@ -1,3 +1,3 @@\n context\n+++plusprefix\n+normal add\n---minusprefix\n-normal del\n"
+	added, removed := countDiffLines(diff)
+	if added != 2 || removed != 2 {
+		t.Fatalf("hunk-aware diff count wrong: added=%d removed=%d (want 2 and 2)", added, removed)
+	}
+}
+
+// TestEditApplyPreservesBackslashSequencesCode covers the AST/code path: the same verbatim
+// guarantee must hold for syntactically-validated code files.
+func TestEditApplyPreservesBackslashSequencesCode(t *testing.T) {
+	tmpDir := t.TempDir()
+	srv := newTestServer(t, tmpDir)
+
+	src := "package main\n\nfunc demo() {\n\tprintln(\"old\")\n}\n"
+	f := writeFile(t, tmpDir, "demo.go", src)
+
+	// Real tab indentation + literal backslash sequences inside string/raw-string literals.
+	newBody := "func demo() {\n\tprintln(\"a\\nb\\tc\")\n\t_ = `\\d+`\n\t_ = \"C:\\\\tmp\"\n}"
+
+	res := callEdit(t, srv, map[string]any{
+		"file":          f,
+		"operation":     "replace",
+		"selector_kind": "function_declaration",
+		"selector_name": "demo",
+		"new_content":   newBody,
+		"response":      "none",
+	})
+	if res.IsError {
+		t.Fatalf("edit returned error: %+v", res.Content)
+	}
+
+	got, err := os.ReadFile(f)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	if !strings.Contains(string(got), newBody) {
+		t.Fatalf("backslash sequences corrupted in code edit.\nwant substring:\n%q\ngot file:\n%q", newBody, string(got))
 	}
 }
@@ -137,6 +137,8 @@ const helpEditApply = "# edit_apply — flags and examples\n\n" +
 	"| `none` | Empty response (silent success) |\n\n" +
 	"`compact_response=true` is a deprecated alias for `response=\"count\"` kept for pre-v2 compatibility.\n\n" +
 	"For code files (Go, TypeScript, JavaScript, Python, C, C++, Rust) syntax is validated before writing — the edit is rejected if it would produce invalid syntax.\n\n" +
+	"## new_content handling\n\n" +
+	"`new_content` is inserted **verbatim**. Send it as normal JSON — real newlines (or JSON `\\n`), with quotes JSON-escaped as usual. The server does not re-interpret escape sequences, so literal backslash sequences in source (`\\n`, `\\t`, `\\\\`, regexes, Windows paths) are written exactly as given. Indentation is not auto-applied — include the leading whitespace you want. Line endings are normalized to the file's existing convention (LF or CRLF).\n\n" +
 	"## Selector types\n\n" +
 	"### AST-mode selectors (code files)\n" +
 	"- `selector_kind` — AST node type (e.g. `function_declaration`, `class_declaration`)\n" +
@@ -313,7 +313,7 @@ func (s *Server) registerTools() {
 				mcp.Description("Edit operation: replace, insert_before, insert_after, delete"),
 			),
 			mcp.WithString("new_content",
-				mcp.Description("New content (required for replace/insert operations)"),
+				mcp.Description("New content (required for replace/insert operations). Inserted verbatim: send it as normal JSON (real newlines, quotes JSON-escaped as usual). The server does NOT re-interpret escape sequences, so backslash sequences in source code (e.g. \\n, \\t, \\\\ inside string literals, regexes, or Windows paths) are preserved exactly. Indentation is not auto-applied — include the leading whitespace you want. Line endings are normalized to the file's existing convention (LF or CRLF)."),
 			),
 			// AST-mode selectors (for code files)
 			mcp.WithString("selector_kind",