Fixes calculations (#2)

Git Level (per commit):
    - Track unique file paths in FilesModified slice
    - FilesChanged = count of unique files in THIS commit

  Aggregator Level (per contributor):
    - Collect all file paths from all commits into a SET
    - FilesChanged = size of the unique file set

  Result:
    - Contributor.FilesChanged = count of UNIQUE files they touched
    - Repository contributor = unique files in THAT repo only
This commit is contained in:
2025-12-19 10:44:00 +00:00
committed by GitHub
parent aedcf87338
commit 3bd9807e50
8 changed files with 420 additions and 57 deletions
+38 -2
View File
@@ -72,6 +72,11 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
// Per-repo activity days
repoActivityDays := make(map[string]map[string]map[string]bool) // repo -> login -> set of date strings
// Track unique files per contributor for accurate FilesChanged count
contributorFiles := make(map[string]map[string]bool) // login -> set of file paths
// Per-repo unique files per contributor
repoContributorFiles := make(map[string]map[string]map[string]bool) // repo -> login -> set of file paths
// Helper to get or create per-repo contributor
getRepoContributor := func(repo, login, name, avatarURL string) *models.ContributorMetrics {
if repoContributorMap[repo] == nil {
@@ -141,7 +146,13 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
cm.MeaningfulLinesDeleted += commit.MeaningfulDeletions
cm.CommentLinesAdded += commit.CommentAdditions
cm.CommentLinesDeleted += commit.CommentDeletions
cm.FilesChanged += commit.FilesChanged
// Track unique files (don't sum - we'll count unique files at the end)
if contributorFiles[login] == nil {
contributorFiles[login] = make(map[string]bool)
}
for _, filePath := range commit.FilesModified {
contributorFiles[login][filePath] = true
}
// Update per-repo contributor stats
rcm := getRepoContributor(commit.Repository, login, cm.Name, cm.AvatarURL)
@@ -152,7 +163,16 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
rcm.MeaningfulLinesDeleted += commit.MeaningfulDeletions
rcm.CommentLinesAdded += commit.CommentAdditions
rcm.CommentLinesDeleted += commit.CommentDeletions
rcm.FilesChanged += commit.FilesChanged
// Track unique files per repo (don't sum - we'll count unique files at the end)
if repoContributorFiles[commit.Repository] == nil {
repoContributorFiles[commit.Repository] = make(map[string]map[string]bool)
}
if repoContributorFiles[commit.Repository][login] == nil {
repoContributorFiles[commit.Repository][login] = make(map[string]bool)
}
for _, filePath := range commit.FilesModified {
repoContributorFiles[commit.Repository][login][filePath] = true
}
// Track activity patterns based on commit time
hour := commit.Date.Hour()
@@ -253,6 +273,13 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
}
}
// Calculate unique files changed for each contributor
for login, files := range contributorFiles {
if cm, ok := contributorMap[login]; ok {
cm.FilesChanged = len(files)
}
}
// Track PRs with changes requested per contributor
prChangesRequested := make(map[string]map[int]bool) // login -> set of PR numbers with changes requested
@@ -579,6 +606,15 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
}
}
// Calculate unique files changed for per-repo contributors
if repoFiles, ok := repoContributorFiles[repo]; ok {
for login, files := range repoFiles {
if rcm, ok := repoContribs[login]; ok {
rcm.FilesChanged = len(files)
}
}
}
// Calculate averages for per-repo contributors
for login, rcm := range repoContribs {
if rcm.PRsMerged > 0 {