Files
lukaszraczylo 3bd9807e50 Fixes calculations (#2)
Git Level (per commit):
    - Track unique file paths in FilesModified slice
    - FilesChanged = count of unique files in THIS commit

  Aggregator Level (per contributor):
    - Collect all file paths from all commits into a SET
    - FilesChanged = size of the unique file set

  Result:
    - Contributor.FilesChanged = count of UNIQUE files they touched
    - Repository contributor = unique files in THAT repo only
2025-12-19 10:44:00 +00:00

531 lines
15 KiB
Go

package git
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/charmbracelet/bubbles/progress"
"github.com/charmbracelet/lipgloss"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/go-git/go-git/v5/plumbing/transport/http"
"github.com/lukaszraczylo/git-velocity/internal/diff"
"github.com/lukaszraczylo/git-velocity/internal/domain/models"
)
// commitProgressBar handles terminal progress display for commit iteration
type commitProgressBar struct {
progress progress.Model
label string
current int
out io.Writer
}
func newCommitProgressBar(label string) *commitProgressBar {
p := progress.New(
progress.WithDefaultGradient(),
progress.WithWidth(40),
)
return &commitProgressBar{
progress: p,
label: label,
current: 0,
out: os.Stderr,
}
}
func (p *commitProgressBar) update(count int) {
p.current = count
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
// Use a spinner-like display since we don't know total
spinner := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}
spinChar := spinner[count%len(spinner)]
fmt.Fprintf(p.out, "\r%s %s %s",
labelStyle.Render(p.label),
spinChar,
countStyle.Render(fmt.Sprintf("%d commits", p.current)),
)
}
func (p *commitProgressBar) done(total int) {
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
fmt.Fprintf(p.out, "\r%s %s %s\n",
labelStyle.Render(p.label),
p.progress.ViewAs(1.0),
countStyle.Render(fmt.Sprintf("%d commits", total)),
)
}
// ProgressCallback is called to report progress during git operations
type ProgressCallback func(message string)
// Repository manages local git repository operations using go-git
type Repository struct {
baseDir string
progress ProgressCallback
}
// NewRepository creates a new repository manager
func NewRepository(baseDir string) (*Repository, error) {
// Create base directory if it doesn't exist
if err := os.MkdirAll(baseDir, 0750); err != nil {
return nil, fmt.Errorf("failed to create base directory: %w", err)
}
return &Repository{
baseDir: baseDir,
progress: func(string) {}, // no-op by default
}, nil
}
// SetProgressCallback sets the callback function for progress reporting
func (r *Repository) SetProgressCallback(cb ProgressCallback) {
if cb != nil {
r.progress = cb
}
}
// repoPath returns the local path for a repository
func (r *Repository) repoPath(owner, name string) string {
return filepath.Join(r.baseDir, owner, name)
}
// CloneOptions contains options for cloning a repository
type CloneOptions struct {
// Depth limits the clone to the specified number of commits (0 = full clone)
Depth int
}
// EnsureClonedWithOptions ensures a repository is cloned with specific options
func (r *Repository) EnsureClonedWithOptions(ctx context.Context, owner, name, token string, opts *CloneOptions) error {
repoPath := r.repoPath(owner, name)
// Check if already cloned
gitDir := filepath.Join(repoPath, ".git")
if _, err := os.Stat(gitDir); err == nil {
// Repository exists, fetch latest
r.progress(fmt.Sprintf(" Updating local clone of %s/%s...", owner, name))
return r.fetch(ctx, repoPath, token)
}
// Clone the repository
if opts != nil && opts.Depth > 0 {
r.progress(fmt.Sprintf(" Shallow cloning %s/%s (depth: %d)...", owner, name, opts.Depth))
} else {
r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name))
}
return r.clone(ctx, owner, name, token, repoPath, opts)
}
// clone clones a repository using go-git
func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string, opts *CloneOptions) error {
// Create parent directory
if err := os.MkdirAll(filepath.Dir(destPath), 0750); err != nil {
return fmt.Errorf("failed to create parent directory: %w", err)
}
cloneURL := fmt.Sprintf("https://github.com/%s/%s.git", owner, name)
cloneOpts := &git.CloneOptions{
URL: cloneURL,
Progress: nil, // Could add progress writer here
}
// Apply shallow clone depth if provided
if opts != nil && opts.Depth > 0 {
cloneOpts.Depth = opts.Depth
}
// Add authentication if token provided
if token != "" {
cloneOpts.Auth = &http.BasicAuth{
Username: "x-access-token",
Password: token,
}
}
_, err := git.PlainCloneContext(ctx, destPath, false, cloneOpts)
if err != nil {
return fmt.Errorf("failed to clone repository: %w", err)
}
return nil
}
// fetch fetches latest changes from remote using go-git
func (r *Repository) fetch(ctx context.Context, repoPath, token string) error {
repo, err := git.PlainOpen(repoPath)
if err != nil {
return fmt.Errorf("failed to open repository: %w", err)
}
fetchOpts := &git.FetchOptions{
RemoteName: "origin",
Force: true,
Prune: true,
RefSpecs: []config.RefSpec{"+refs/*:refs/*"},
}
// Add authentication if token provided
if token != "" {
fetchOpts.Auth = &http.BasicAuth{
Username: "x-access-token",
Password: token,
}
}
err = repo.FetchContext(ctx, fetchOpts)
if err != nil && err != git.NoErrAlreadyUpToDate {
return fmt.Errorf("failed to fetch: %w", err)
}
return nil
}
// FetchCommits retrieves commits from the local repository using go-git
func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since, until *time.Time) ([]models.Commit, error) {
repoPath := r.repoPath(owner, name)
repo, err := git.PlainOpen(repoPath)
if err != nil {
return nil, fmt.Errorf("failed to open repository: %w", err)
}
// Get all references to iterate all branches
refs, err := repo.References()
if err != nil {
return nil, fmt.Errorf("failed to get references: %w", err)
}
// Collect all commit hashes from all branches
seenCommits := make(map[plumbing.Hash]bool)
var commits []models.Commit
testPatterns := []string{"_test.go", ".test.", ".spec.", "/tests/", "/test/", "__tests__"}
// Progress bar for commit iteration
pbar := newCommitProgressBar(" Iterating commits:")
processedCount := 0
// Hard cutoff: 1 week before start date - stop iterating entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
// errStopIteration is used to signal early termination (not a real error)
var errStopIteration = fmt.Errorf("stop iteration")
err = refs.ForEach(func(ref *plumbing.Reference) error {
// Skip non-branch references
if !ref.Name().IsBranch() && !ref.Name().IsRemote() && !ref.Name().IsTag() {
return nil
}
// Get commit iterator for this reference
commitIter, err := repo.Log(&git.LogOptions{
From: ref.Hash(),
Order: git.LogOrderCommitterTime,
All: false,
})
if err != nil {
// Skip refs that don't point to commits
return nil
}
consecutiveOld := 0
err = commitIter.ForEach(func(c *object.Commit) error {
// Check context cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip already seen commits
if seenCommits[c.Hash] {
return nil
}
seenCommits[c.Hash] = true
processedCount++
// Update progress every 10 commits to avoid too much I/O
if processedCount%10 == 0 {
pbar.update(processedCount)
}
commitTime := c.Author.When
// Hard cutoff - stop entirely if past this date
if hardCutoff != nil && commitTime.Before(*hardCutoff) {
return errStopIteration
}
// Filter by date range
if since != nil && commitTime.Before(*since) {
consecutiveOld++
// Early termination: if we've seen 100 consecutive old commits, stop this branch
if consecutiveOld >= 100 {
return errStopIteration
}
return nil
}
consecutiveOld = 0 // Reset counter when we find a valid commit
if until != nil && commitTime.After(*until) {
return nil
}
// Get file stats for this commit
stats := r.getCommitStats(c, testPatterns)
// Extract login from email
authorLogin := extractLoginFromEmail(c.Author.Email, c.Author.Name)
committerLogin := extractLoginFromEmail(c.Committer.Email, c.Committer.Name)
commit := models.Commit{
SHA: c.Hash.String(),
Message: strings.Split(c.Message, "\n")[0], // First line only
Author: models.Author{
Login: authorLogin,
Name: c.Author.Name,
Email: c.Author.Email,
},
Committer: models.Author{
Login: committerLogin,
Name: c.Committer.Name,
Email: c.Committer.Email,
},
Date: commitTime,
Additions: stats.Additions,
Deletions: stats.Deletions,
MeaningfulAdditions: stats.MeaningfulAdditions,
MeaningfulDeletions: stats.MeaningfulDeletions,
CommentAdditions: stats.CommentAdditions,
CommentDeletions: stats.CommentDeletions,
DocCommentAdditions: stats.DocCommentAdditions,
DocCommentDeletions: stats.DocCommentDeletions,
CommentedCodeAdditions: stats.CommentedCodeAdditions,
CommentedCodeDeletions: stats.CommentedCodeDeletions,
FilesChanged: stats.FilesChanged,
FilesModified: stats.FilesModified,
Repository: fmt.Sprintf("%s/%s", owner, name),
URL: fmt.Sprintf("https://github.com/%s/%s/commit/%s", owner, name, c.Hash.String()),
HasTests: stats.HasTests,
}
commits = append(commits, commit)
return nil
})
// Handle expected termination conditions
if err == errStopIteration {
return nil // Not an error, just early termination for this branch
}
// Handle shallow clone boundary - "object not found" means we've reached
// the edge of the shallow clone history, which is expected behavior
if err != nil && isShallowBoundaryError(err) {
err = nil // Treat as normal end of history
}
return err
})
// Complete progress bar
pbar.done(len(commits))
if err != nil {
return nil, fmt.Errorf("failed to iterate commits: %w", err)
}
return commits, nil
}
// commitStats holds the statistics for a commit
type commitStats struct {
Additions int
Deletions int
MeaningfulAdditions int
MeaningfulDeletions int
CommentAdditions int
CommentDeletions int
DocCommentAdditions int
DocCommentDeletions int
CommentedCodeAdditions int
CommentedCodeDeletions int
FilesChanged int
FilesModified []string // List of file paths modified
HasTests bool
}
// getCommitStats calculates additions, deletions, files changed for a commit
func (r *Repository) getCommitStats(c *object.Commit, testPatterns []string) commitStats {
stats := commitStats{}
// Get parent commit for diff
parentIter := c.Parents()
parent, err := parentIter.Next()
var parentTree *object.Tree
if err == nil {
parentTree, _ = parent.Tree()
}
currentTree, err := c.Tree()
if err != nil {
return stats
}
// Get changes between parent and current
var changes object.Changes
if parentTree != nil {
changes, err = parentTree.Diff(currentTree)
} else {
// Initial commit - all files are additions
changes, err = object.DiffTree(nil, currentTree)
}
if err != nil {
return stats
}
filesSet := make(map[string]bool)
for _, change := range changes {
// Get the file path (prefer destination for renames/moves, fallback to source)
var filePath string
if change.To.Name != "" {
filePath = change.To.Name
} else if change.From.Name != "" {
filePath = change.From.Name
}
// Skip if no file path (shouldn't happen, but defensive)
if filePath == "" {
continue
}
// Skip documentation files entirely
if diff.IsDocumentationFile(filePath) {
continue
}
// Check if this is a rename/move operation
isRename := diff.IsRenameOrMove(change.From.Name, change.To.Name)
// Count unique files (but NOT for renames - the file already existed)
if !isRename && !filesSet[filePath] {
filesSet[filePath] = true
stats.FilesChanged++
stats.FilesModified = append(stats.FilesModified, filePath)
// Check for test files
for _, pattern := range testPatterns {
if strings.Contains(filePath, pattern) {
stats.HasTests = true
break
}
}
}
// Get patch to count lines (even for renames, there may be content changes)
patch, err := change.Patch()
if err != nil {
continue
}
for _, filePatch := range patch.FilePatches() {
// For binary files, skip line counting
if filePatch.IsBinary() {
continue
}
for _, chunk := range filePatch.Chunks() {
content := chunk.Content()
lines := strings.Split(content, "\n")
switch chunk.Type() {
case 1: // Add
for _, line := range lines {
stats.Additions++
if diff.IsMeaningfulLine(line) {
stats.MeaningfulAdditions++
} else if diff.IsCommentLine(line) {
stats.CommentAdditions++
// Further classify the comment type
if diff.IsDocCommentLine(line) {
stats.DocCommentAdditions++
} else if diff.IsCommentedOutCode(line) {
stats.CommentedCodeAdditions++
}
}
// Whitespace lines are neither meaningful nor comments
}
case 2: // Delete
for _, line := range lines {
stats.Deletions++
if diff.IsMeaningfulLine(line) {
stats.MeaningfulDeletions++
} else if diff.IsCommentLine(line) {
stats.CommentDeletions++
// Further classify the comment type
if diff.IsDocCommentLine(line) {
stats.DocCommentDeletions++
} else if diff.IsCommentedOutCode(line) {
stats.CommentedCodeDeletions++
}
}
// Whitespace lines are neither meaningful nor comments
}
}
}
}
}
return stats
}
// isShallowBoundaryError checks if an error indicates we've hit the shallow clone boundary
func isShallowBoundaryError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
// go-git returns "object not found" when trying to access commits beyond shallow depth
return strings.Contains(errStr, "object not found")
}
// extractLoginFromEmail tries to extract GitHub login from email
func extractLoginFromEmail(email, fallbackName string) string {
// Pattern: 12345678+username@users.noreply.github.com
// or: username@users.noreply.github.com
if strings.Contains(email, "@users.noreply.github.com") {
localPart := strings.Split(email, "@")[0]
// Remove numeric prefix if present (e.g., "12345678+username")
if idx := strings.Index(localPart, "+"); idx != -1 {
return localPart[idx+1:]
}
return localPart
}
// Fallback: use sanitized name as login
login := strings.ToLower(fallbackName)
login = regexp.MustCompile(`[^a-z0-9-]`).ReplaceAllString(login, "-")
return login
}