Fixes calculations (#2)

Git Level (per commit):
    - Track unique file paths in FilesModified slice
    - FilesChanged = count of unique files in THIS commit

  Aggregator Level (per contributor):
    - Collect all file paths from all commits into a SET
    - FilesChanged = size of the unique file set

  Result:
    - Contributor.FilesChanged = count of UNIQUE files they touched
    - Repository contributor = unique files in THAT repo only
This commit is contained in:
2025-12-19 10:44:00 +00:00
committed by GitHub
parent aedcf87338
commit 3bd9807e50
8 changed files with 420 additions and 57 deletions
+108 -2
View File
@@ -5,19 +5,22 @@ import (
)
// IsCommentLine checks if a line is a code comment (should not count as meaningful contribution)
// Note: Empty/whitespace lines are NOT comments - use IsWhitespaceLine for those.
func IsCommentLine(line string) bool {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
return true // Empty lines don't count
return false // Empty lines are whitespace, not comments
}
// Common comment patterns across languages
// Order matters for overlapping prefixes (e.g., "///" before "//")
commentPrefixes := []string{
"///", // Rust/Swift/C# doc comments
"//", // C, C++, Java, Go, JS, TS, Swift, Kotlin, etc.
"#", // Python, Ruby, Shell, YAML, Perl, etc.
"/**", // JSDoc/JavaDoc block start
"/*", // C-style block comment start
"*/", // C-style block comment end
"*", // C-style block comment continuation
"<!--", // HTML/XML comment
"-->", // HTML/XML comment end
"--", // SQL, Lua, Haskell
@@ -33,6 +36,19 @@ func IsCommentLine(line string) bool {
}
}
// C-style block comment continuation: line starts with * followed by space or end of line
// This avoids false positives like "*ptr = value" (pointer dereference)
if strings.HasPrefix(trimmed, "*") {
if len(trimmed) == 1 {
return true // Just "*" alone
}
// Must be followed by whitespace or common comment characters, not alphanumeric
nextChar := trimmed[1]
if nextChar == ' ' || nextChar == '\t' || nextChar == '/' {
return true
}
}
return false
}
@@ -64,6 +80,96 @@ func IsMeaningfulLine(line string) bool {
return !IsWhitespaceLine(line) && !IsCommentLine(line)
}
// IsDocCommentLine checks if a line is a documentation comment (JSDoc, JavaDoc, Rust doc, etc.)
// These are comments specifically meant to document code, as opposed to regular comments.
func IsDocCommentLine(line string) bool {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
return false
}
// Documentation comment patterns
docPrefixes := []string{
"///", // Rust, Swift, C# doc comments
"//!", // Rust inner doc comments
"/**", // JSDoc, JavaDoc block start
"\"\"\"", // Python docstring
"'''", // Python docstring
}
for _, prefix := range docPrefixes {
if strings.HasPrefix(trimmed, prefix) {
return true
}
}
// JSDoc/JavaDoc continuation lines with annotations (@param, @return, etc.)
if strings.HasPrefix(trimmed, "* @") || strings.HasPrefix(trimmed, "* @") {
return true
}
// Check for common doc annotations at the start of a comment
if strings.HasPrefix(trimmed, "// @") || strings.HasPrefix(trimmed, "# @") {
return true
}
return false
}
// IsCommentedOutCode attempts to detect if a comment line contains commented-out code
// rather than an actual comment. This is a heuristic and may have false positives/negatives.
func IsCommentedOutCode(line string) bool {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
return false
}
// Remove comment prefix to get the content
var content string
commentPrefixes := []string{"///", "//", "#", "/*", "--", ";"}
for _, prefix := range commentPrefixes {
if strings.HasPrefix(trimmed, prefix) {
content = strings.TrimSpace(trimmed[len(prefix):])
break
}
}
if content == "" {
return false
}
// Heuristics for detecting commented-out code:
// 1. Ends with common code patterns
codeEndings := []string{";", "{", "}", ")", ",", ":", "=>", "->"}
for _, ending := range codeEndings {
if strings.HasSuffix(content, ending) {
return true
}
}
// 2. Starts with common code keywords
codeKeywords := []string{
"if ", "else ", "for ", "while ", "switch ", "case ", "return ", "break", "continue",
"const ", "let ", "var ", "func ", "function ", "def ", "class ", "struct ", "type ",
"import ", "from ", "package ", "public ", "private ", "protected ", "static ",
"async ", "await ", "try ", "catch ", "throw ", "raise ",
}
contentLower := strings.ToLower(content)
for _, keyword := range codeKeywords {
if strings.HasPrefix(contentLower, keyword) {
return true
}
}
// 3. Contains assignment operators
if strings.Contains(content, " = ") || strings.Contains(content, " := ") ||
strings.Contains(content, " == ") || strings.Contains(content, " != ") {
return true
}
return false
}
// IsRenameOrMove checks if a file change represents a rename or move operation
// rather than actual content modification. A rename/move is detected when both
// the source (fromName) and destination (toName) paths exist and differ.
+180 -5
View File
@@ -12,11 +12,11 @@ func TestIsCommentLine(t *testing.T) {
line string
expected bool
}{
// Empty and whitespace
{"empty string", "", true},
{"whitespace only", " ", true},
{"tab only", "\t", true},
{"mixed whitespace", " \t ", true},
// Empty and whitespace - NOT comments (use IsWhitespaceLine instead)
{"empty string", "", false},
{"whitespace only", " ", false},
{"tab only", "\t", false},
{"mixed whitespace", " \t ", false},
// C-style comments (Go, Java, JS, C++, etc.)
{"C single line comment", "// this is a comment", true},
@@ -25,6 +25,18 @@ func TestIsCommentLine(t *testing.T) {
{"C block end", "*/", true},
{"C block continuation", "* continuation", true},
{"C block continuation with space", " * continuation", true},
{"just asterisk", "*", true},
{"asterisk with slash", "*/", true},
// Pointer dereferences - NOT comments
{"pointer dereference", "*ptr = value", false},
{"pointer in expression", "*foo.bar", false},
{"multiplication", "*result", false},
// Doc comments
{"Rust doc comment", "/// This documents the function", true},
{"Rust inner doc", "//! Module documentation", true},
{"JSDoc start", "/** @param x the value */", true},
// Python/Shell comments
{"Python comment", "# python comment", true},
@@ -61,6 +73,70 @@ func TestIsCommentLine(t *testing.T) {
{"Function call", "fmt.Println(x)", false},
{"String with slash", `"http://example.com"`, false},
{"Code after whitespace", " x := 5", false},
// Indented code (common in diffs) - NOT comments
{"tab indented code", "\tfunc main() {", false},
{"space indented code", " if x > 0 {", false},
{"deeply indented", "\t\t\t\treturn nil", false},
{"mixed indentation", " \t for i := range items {", false},
{"indented closing brace", "\t}", false},
{"indented method call", " obj.Method()", false},
// TypeScript/JavaScript specific - NOT comments
{"TS interface", "interface User {", false},
{"TS type alias", "type Handler = () => void;", false},
{"TS arrow function", "const fn = () => {", false},
{"TS arrow function with type", "const fn = (x: number): string => {", false},
{"JS const", "const x = 5;", false},
{"JS let", "let counter = 0;", false},
{"JS async", "async function fetch() {", false},
{"JS await", "const result = await fetch(url);", false},
{"JS template literal", "const msg = `Hello ${name}`;", false},
{"JS export", "export default Component;", false},
{"JS import", "import { useState } from 'react';", false},
{"TS generic", "function identity<T>(arg: T): T {", false},
{"React JSX", "<Component prop={value} />", false},
{"JSX with children", "<div className=\"container\">", false},
// TypeScript/JavaScript comments
{"TS comment", "// TypeScript comment", true},
{"JSDoc block", "/** @type {string} */", true},
{"TSDoc", "/** @param name - the user name */", true},
// Go specific - NOT comments
{"Go struct", "type User struct {", false},
{"Go interface def", "type Reader interface {", false},
{"Go func with receiver", "func (u *User) Name() string {", false},
{"Go goroutine", "go processItem(item)", false},
{"Go defer", "defer file.Close()", false},
{"Go channel send", "ch <- value", false},
{"Go channel receive", "value := <-ch", false},
{"Go select", "select {", false},
{"Go case", "case <-done:", false},
{"Go map literal", "m := map[string]int{}", false},
{"Go slice literal", "s := []int{1, 2, 3}", false},
{"Go error handling", "if err != nil {", false},
{"Go short var decl", "x := 5", false},
{"Go range", "for i, v := range items {", false},
// Python specific - NOT comments
{"Python def", "def main():", false},
{"Python class", "class User:", false},
{"Python async def", "async def fetch():", false},
{"Python decorator", "@property", false},
{"Python with", "with open('file') as f:", false},
{"Python try", "try:", false},
{"Python except", "except ValueError as e:", false},
{"Python lambda", "fn = lambda x: x * 2", false},
{"Python list comp", "squares = [x**2 for x in range(10)]", false},
{"Python dict comp", "d = {k: v for k, v in items}", false},
{"Python f-string", "msg = f\"Hello {name}\"", false},
{"Python import from", "from typing import List", false},
{"Python type hint", "def greet(name: str) -> str:", false},
// Python comments
{"Python comment with hash", "# This is a comment", true},
{"Python inline comment would be code", "x = 5 # inline", false}, // The line starts with code
}
for _, tt := range tests {
@@ -158,6 +234,17 @@ func TestIsMeaningfulLine(t *testing.T) {
{"whitespace line", " ", false},
{"python comment", "# comment", false},
{"code with leading whitespace", " x := 5", true},
// Indented code is still meaningful
{"tab indented code", "\tfunc main() {", true},
{"deeply indented code", "\t\t\treturn result", true},
{"space indented code", " if err != nil {", true},
{"mixed indentation code", " \t for _, item := range items {", true},
{"indented closing brace", "\t\t}", true},
// Indented comments are still comments (not meaningful)
{"indented comment", "\t// TODO: fix this", false},
{"space indented comment", " # Python comment", false},
}
for _, tt := range tests {
@@ -202,3 +289,91 @@ func TestIsRenameOrMove(t *testing.T) {
})
}
}
func TestIsDocCommentLine(t *testing.T) {
tests := []struct {
name string
line string
expected bool
}{
// Documentation comments
{"Rust doc comment", "/// This documents the function", true},
{"Rust doc with leading space", " /// This documents the function", true},
{"Rust inner doc", "//! Module documentation", true},
{"JSDoc block start", "/** @param x the value */", true},
{"JSDoc block start with space", " /** @param x */", true},
{"Python docstring double", "\"\"\"This is a docstring", true},
{"Python docstring single", "'''This is a docstring", true},
{"JSDoc annotation line", "* @param x the value", true},
{"JSDoc annotation with extra space", "* @returns the result", true},
{"annotation comment", "// @deprecated use newFunc instead", true},
{"Python annotation", "# @param x the value", true},
// Regular comments - NOT doc comments
{"regular C comment", "// this is a comment", false},
{"regular Python comment", "# just a comment", false},
{"block comment start", "/* start of block */", false},
{"block continuation", "* continuation without annotation", false},
// Empty and whitespace
{"empty string", "", false},
{"whitespace only", " ", false},
// Code - NOT doc comments
{"Go code", "func main() {", false},
{"Python code", "def main():", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := IsDocCommentLine(tt.line)
assert.Equal(t, tt.expected, result, "IsDocCommentLine(%q)", tt.line)
})
}
}
func TestIsCommentedOutCode(t *testing.T) {
tests := []struct {
name string
line string
expected bool
}{
// Commented-out code - should return true
{"commented variable declaration", "// const x = 5;", true},
{"commented function call", "// fmt.Println(x)", true}, // Ends with )
{"commented function def", "// func main() {", true},
{"commented return", "// return nil", true},
{"commented import", "// import fmt", true},
{"commented if statement", "// if x > 0 {", true},
{"commented else", "// else {", true},
{"commented for loop", "// for i := 0; i < 10; i++ {", true},
{"commented assignment", "// x = 10", true}, // Contains = operator
{"commented with equals", "// x = y + 10;", true}, // Ends with ;
{"Python commented code", "# def main():", true}, // colon at end
{"commented arrow function", "// const fn = () => {", true},
{"commented Go assignment", "// x := 5", true},
// Regular comments - should return false
{"todo comment", "// TODO: fix this", false},
{"note comment", "// Note: this is important", false},
{"explanation comment", "// This function handles errors", false},
{"section comment", "// ============", false},
{"url in comment", "// See https://example.com", false},
// Empty and edge cases
{"empty string", "", false},
{"just comment prefix", "//", false},
{"whitespace only", " ", false},
// Code (not commented) - should return false
{"actual code", "const x = 5;", false},
{"actual function", "func main() {", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := IsCommentedOutCode(tt.line)
assert.Equal(t, tt.expected, result, "IsCommentedOutCode(%q)", tt.line)
})
}
}