refactor(edit): remove auto-indentation and add line-ending normalization

- [x] Remove auto-indentation from text mode edits (caller controls whitespace)
- [x] Add line-ending detection and normalization for both AST and text modes
- [x] Share edit logic via new `spliceContent` function for both modes
- [x] Fix diff to emit "No newline at end of file" markers
- [x] Fix diff to strip raw CR from CRLF file output
- [x] Remove double-unescape of backslash sequences in new_content
- [x] Fix countDiffLines to be hunk-aware (correctly count lines starting with +/-)
- [x] Fix block-comment stripping to remove standalone lines cleanly
- [x] Fix Python license header stripping to preserve separator blank lines
This commit is contained in:
2026-05-29 00:17:36 +01:00
parent f1643e7b81
commit 9af2801b1b
9 changed files with 596 additions and 334 deletions
+78 -30
View File
@@ -81,22 +81,21 @@ func stripLicense(content string) (string, bool) {
}
}
// Python/hash-style leading comment block
// Python/hash-style leading comment block. Only contiguous "#" lines belong to the
// header; a blank line ends it and is preserved as a separator (rather than being
// greedily swallowed and collapsed away).
if strings.HasPrefix(trimmed, "#") {
lines := strings.Split(trimmed, "\n")
var commentLines []string
var rest []string
inComment := true
var commentLines, rest []string
for i, l := range lines {
if inComment && (strings.HasPrefix(l, "#") || strings.TrimSpace(l) == "") {
if strings.HasPrefix(l, "#") {
commentLines = append(commentLines, l)
} else {
rest = lines[i:]
break
continue
}
rest = lines[i:]
break
}
block := strings.Join(commentLines, "\n")
lower := strings.ToLower(block)
lower := strings.ToLower(strings.Join(commentLines, "\n"))
if strings.Contains(lower, "copyright") ||
strings.Contains(lower, "license") ||
strings.Contains(lower, "spdx-license-identifier") {
@@ -240,60 +239,109 @@ func stripBlockComments(content string, lang protocol.Language) (string, bool) {
return stripCStyleBlockComments(content)
}
// stripCStyleBlockComments removes /* ... */ from content.
// trimTrailingLineWhitespace drops trailing spaces/tabs from out (back to, but not past,
// the previous newline). Used when a standalone comment line is removed so its leading
// indentation does not linger as a whitespace-only line.
func trimTrailingLineWhitespace(out []byte) []byte {
for len(out) > 0 && (out[len(out)-1] == ' ' || out[len(out)-1] == '\t') {
out = out[:len(out)-1]
}
return out
}
// skipLineTail advances i over trailing spaces/tabs and a CR, then a single LF — i.e. the
// remainder of a line after a standalone comment's closer, including its \n or \r\n
// terminator. Returns the new index.
func skipLineTail(content string, i int) int {
for i < len(content) && (content[i] == ' ' || content[i] == '\t' || content[i] == '\r') {
i++
}
if i < len(content) && content[i] == '\n' {
i++
}
return i
}
// stripCStyleBlockComments removes /* ... */ comments. A comment that occupies a whole
// line (only whitespace before it) is removed together with that line's indentation and
// terminator; an inline comment (code precedes it) is removed in place, leaving the
// surrounding line — and crucially its terminator — intact so lines are never merged.
func stripCStyleBlockComments(content string) (string, bool) {
removed := false
var sb strings.Builder
out := make([]byte, 0, len(content))
lineHasNonSpace := false
i := 0
for i < len(content) {
if i+1 < len(content) && content[i] == '/' && content[i+1] == '*' {
// find closing */
end := strings.Index(content[i+2:], "*/")
if end >= 0 {
if end := strings.Index(content[i+2:], "*/"); end >= 0 {
removed = true
// advance past */
i = i + 2 + end + 2
// consume trailing newline
if i < len(content) && content[i] == '\n' {
i++
standalone := !lineHasNonSpace
i = i + 2 + end + 2 // advance past closing */
if standalone {
out = trimTrailingLineWhitespace(out)
i = skipLineTail(content, i)
lineHasNonSpace = false
}
continue
}
}
sb.WriteByte(content[i])
c := content[i]
switch c {
case '\n':
lineHasNonSpace = false
case ' ', '\t', '\r':
// whitespace: does not mark the line as having content
default:
lineHasNonSpace = true
}
out = append(out, c)
i++
}
if !removed {
return content, false
}
return sb.String(), true
return string(out), true
}
// stripPythonDocstrings removes triple-quoted strings (""" and ”').
// stripPythonDocstrings removes triple-quoted strings (""" and ”'). As with block
// comments, a standalone docstring line is removed along with its indentation and
// terminator, while an inline triple-quoted string leaves its line's terminator intact.
func stripPythonDocstrings(content string) (string, bool) {
removed := false
var sb strings.Builder
out := make([]byte, 0, len(content))
lineHasNonSpace := false
i := 0
for i < len(content) {
if i+2 < len(content) {
triple := content[i : i+3]
if triple == `"""` || triple == `'''` {
end := strings.Index(content[i+3:], triple)
if end >= 0 {
if end := strings.Index(content[i+3:], triple); end >= 0 {
removed = true
standalone := !lineHasNonSpace
i = i + 3 + end + 3
if i < len(content) && content[i] == '\n' {
i++
if standalone {
out = trimTrailingLineWhitespace(out)
i = skipLineTail(content, i)
lineHasNonSpace = false
}
continue
}
}
}
sb.WriteByte(content[i])
c := content[i]
switch c {
case '\n':
lineHasNonSpace = false
case ' ', '\t', '\r':
// whitespace: does not mark the line as having content
default:
lineHasNonSpace = true
}
out = append(out, c)
i++
}
if !removed {
return content, false
}
return sb.String(), true
return string(out), true
}
+47
View File
@@ -0,0 +1,47 @@
package parser
import (
"testing"
"github.com/lukaszraczylo/mcp-filepuff/pkg/protocol"
)
// An inline block comment (code before it on the same line) must not cause the following
// line to be merged onto it — the line's terminator must survive.
func TestStripBlockCommentInlineNoLineMerge(t *testing.T) {
got := StripContent("a := 1 /* note */\nb := 2\n", []StripFlag{StripBlockComments}, protocol.LangGo)
want := "a := 1 \nb := 2\n"
if got.Content != want {
t.Fatalf("inline block comment must not merge lines.\nwant: %q\ngot: %q", want, got.Content)
}
}
// A standalone block-comment line (only whitespace before it) is removed in full,
// including its indentation and terminator — no stray blank/whitespace line left behind.
func TestStripBlockCommentStandaloneRemovesLine(t *testing.T) {
got := StripContent("x\n\t/* c */\ny\n", []StripFlag{StripBlockComments}, protocol.LangGo)
want := "x\ny\n"
if got.Content != want {
t.Fatalf("standalone block comment line must be removed cleanly.\nwant: %q\ngot: %q", want, got.Content)
}
}
// On a CRLF file, removing a standalone block-comment line must consume the full \r\n
// terminator rather than leaving a stray blank (bare-CR) line.
func TestStripBlockCommentCRLFNoStrayBlank(t *testing.T) {
got := StripContent("code\r\n/* c */\r\nmore\r\n", []StripFlag{StripBlockComments}, protocol.LangGo)
want := "code\r\nmore\r\n"
if got.Content != want {
t.Fatalf("CRLF standalone block comment must not leave a stray blank line.\nwant: %q\ngot: %q", want, got.Content)
}
}
// Stripping a hash-style license header must not greedily swallow the blank separator
// line that follows it.
func TestStripLicensePythonPreservesSeparatorBlank(t *testing.T) {
got := StripContent("# Copyright 2024\n# License MIT\n\ncode\n", []StripFlag{StripLicense}, protocol.LangPython)
want := "\ncode\n"
if got.Content != want {
t.Fatalf("python license strip must keep the blank separator.\nwant: %q\ngot: %q", want, got.Content)
}
}