Compare commits

...

5 Commits

17 changed files with 1914 additions and 568 deletions
+2 -11
View File
@@ -393,9 +393,9 @@ scoring:
points:
commit: 10
commit_with_tests: 15
# Line scoring always uses meaningful lines (excludes comments/whitespace)
lines_added: 0.1
lines_deleted: 0.05
use_meaningful_lines: true # Exclude comments/whitespace from line scoring
pr_opened: 25
pr_merged: 50
pr_reviewed: 30
@@ -428,7 +428,6 @@ options:
additional_bot_patterns:
- "my-org-bot"
- "jenkins*"
use_local_git: true
clone_directory: "./.repos"
user_aliases:
- github_login: "username"
@@ -481,7 +480,7 @@ options:
### Meaningful Lines Filtering
By default, Git Velocity filters out non-meaningful code changes when scoring line additions and deletions. This provides a more accurate measure of actual code contributions.
Git Velocity always filters out non-meaningful code changes when scoring line additions and deletions. This provides an accurate measure of actual code contributions.
**What's filtered out:**
- **Comments**: Single-line (`//`, `#`, `--`), block (`/* */`, `<!-- -->`), docstrings (`"""`, `'''`)
@@ -496,14 +495,6 @@ By default, Git Velocity filters out non-meaningful code changes when scoring li
- VB: `'`
- HTML/XML: `<!-- -->`
To disable this filtering and score raw line counts:
```yaml
scoring:
points:
use_meaningful_lines: false # Score all lines including comments/whitespace
```
### Environment Variables
All configuration values support environment variable expansion:
+1 -2
View File
@@ -87,10 +87,9 @@ scoring:
points:
commit: 10
commit_with_tests: 15
# Line scoring always uses meaningful lines (excludes comments/whitespace)
lines_added: 0.1
lines_deleted: 0.05
# Use meaningful lines (excludes comments/whitespace) for scoring
use_meaningful_lines: true
pr_opened: 25
pr_merged: 50
pr_reviewed: 30
+2 -2
View File
@@ -335,8 +335,8 @@ Where:
</div>
</div>
<p class="text-sm text-gray-500 dark:text-gray-400 mt-4">
<i class="fas fa-cog mr-1"></i>
Disable with <code class="text-pink-600 dark:text-pink-400">use_meaningful_lines: false</code> in config to use raw line counts.
<i class="fas fa-info-circle mr-1"></i>
Meaningful lines filtering is always enabled to accurately reflect code contributions.
</p>
</div>
</div>
-1
View File
@@ -831,7 +831,6 @@
<span class="text-pink-400">options:</span>
<span class="text-purple-400">concurrent_requests:</span> 5
<span class="text-purple-400">include_bots:</span> false
<span class="text-purple-400">use_local_git:</span> true
<span class="text-purple-400">user_aliases:</span>
- <span class="text-indigo-400">github_login:</span> "johndoe"
<span class="text-indigo-400">emails:</span> ["john@work.com", "john@personal.com"]</code></pre>
+28 -2
View File
@@ -1,13 +1,18 @@
module github.com/lukaszraczylo/git-velocity
go 1.24.0
go 1.24.2
require (
github.com/bradleyfalzon/ghinstallation/v2 v2.17.0
github.com/charmbracelet/bubbles v0.21.0
github.com/charmbracelet/lipgloss v1.1.0
github.com/go-git/go-git/v5 v5.16.4
github.com/goccy/go-json v0.10.5
github.com/google/go-github/v68 v68.0.0
github.com/shurcooL/githubv4 v0.0.0-20240727222349-48295856cce7
github.com/spf13/cobra v1.10.2
github.com/stretchr/testify v1.11.1
golang.org/x/oauth2 v0.34.0
gopkg.in/yaml.v3 v3.0.1
)
@@ -15,13 +20,23 @@ require (
dario.cat/mergo v1.0.2 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/go-crypto v1.3.0 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/bubbletea v1.3.10 // indirect
github.com/charmbracelet/colorprofile v0.4.1 // indirect
github.com/charmbracelet/harmonica v0.2.0 // indirect
github.com/charmbracelet/x/ansi v0.11.3 // indirect
github.com/charmbracelet/x/cellbuf v0.0.14 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
github.com/clipperhouse/displaywidth v0.6.1 // indirect
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.3.0 // indirect
github.com/cloudflare/circl v1.6.1 // indirect
github.com/cyphar/filepath-securejoin v0.6.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
github.com/go-git/go-billy/v5 v5.7.0 // indirect
github.com/goccy/go-json v0.10.5 // indirect
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
github.com/google/go-github/v75 v75.0.0 // indirect
@@ -30,14 +45,25 @@ require (
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/kevinburke/ssh_config v1.4.0 // indirect
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.19 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/pjbgf/sha1cd v0.5.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/sergi/go-diff v1.4.0 // indirect
github.com/shurcooL/graphql v0.0.0-20230722043721-ed46e5a46466 // indirect
github.com/skeema/knownhosts v1.3.2 // indirect
github.com/spf13/pflag v1.0.10 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/crypto v0.46.0 // indirect
golang.org/x/net v0.48.0 // indirect
golang.org/x/sys v0.39.0 // indirect
golang.org/x/text v0.32.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
)
+52
View File
@@ -9,8 +9,32 @@ github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFI
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/bradleyfalzon/ghinstallation/v2 v2.17.0 h1:SmbUK/GxpAspRjSQbB6ARvH+ArzlNzTtHydNyXUQ6zg=
github.com/bradleyfalzon/ghinstallation/v2 v2.17.0/go.mod h1:vuD/xvJT9Y+ZVZRv4HQ42cMyPFIYqpc7AbB4Gvt/DlY=
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
github.com/charmbracelet/harmonica v0.2.0 h1:8NxJWRWg/bzKqqEaaeFNipOu77YR5t8aSwG4pgaUBiQ=
github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
github.com/charmbracelet/x/ansi v0.11.3 h1:6DcVaqWI82BBVM/atTyq6yBoRLZFBsnoDoX9GCu2YOI=
github.com/charmbracelet/x/ansi v0.11.3/go.mod h1:yI7Zslym9tCJcedxz5+WBq+eUGMJT0bM06Fqy1/Y4dI=
github.com/charmbracelet/x/cellbuf v0.0.14 h1:iUEMryGyFTelKW3THW4+FfPgi4fkmKnnaLOXuc+/Kj4=
github.com/charmbracelet/x/cellbuf v0.0.14/go.mod h1:P447lJl49ywBbil/KjCk2HexGh4tEY9LH0/1QrZZ9rA=
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
github.com/clipperhouse/displaywidth v0.6.1 h1:/zMlAezfDzT2xy6acHBzwIfyu2ic0hgkT83UX5EY2gY=
github.com/clipperhouse/displaywidth v0.6.1/go.mod h1:R+kHuzaYWFkTm7xoMmK1lFydbci4X2CicfbGstSGg0o=
github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
github.com/clipperhouse/uax29/v2 v2.3.0 h1:SNdx9DVUqMoBuBoW3iLOj4FQv3dN5mDtuqwuhIGpJy4=
github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=
github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
@@ -23,6 +47,8 @@ github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o
github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
@@ -63,6 +89,20 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
github.com/pjbgf/sha1cd v0.5.0 h1:a+UkboSi1znleCDUNT3M5YxjOnN1fz2FhN48FlwCxs0=
@@ -71,11 +111,17 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw=
github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/shurcooL/githubv4 v0.0.0-20240727222349-48295856cce7 h1:cYCy18SHPKRkvclm+pWm1Lk4YrREb4IOIb/YdFO0p2M=
github.com/shurcooL/githubv4 v0.0.0-20240727222349-48295856cce7/go.mod h1:zqMwyHmnN/eDOZOdiTohqIUKUrTFX62PNlu7IJdu0q8=
github.com/shurcooL/graphql v0.0.0-20230722043721-ed46e5a46466 h1:17JxqqJY66GmZVHkmAsGEkcIu0oCe3AM420QDgGwZx0=
github.com/shurcooL/graphql v0.0.0-20230722043721-ed46e5a46466/go.mod h1:9dIRpgIY7hVhoqfe0/FcYp0bpInZaT7dc3BYOprrIUE=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/skeema/knownhosts v1.3.2 h1:EDL9mgf4NzwMXCTfaxSD/o/a5fxDw/xL9nkU28JjdBg=
github.com/skeema/knownhosts v1.3.2/go.mod h1:bEg3iQAuw+jyiw+484wwFJoKSLwcfd7fqRy+N0QTiow=
@@ -91,6 +137,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
@@ -100,12 +148,16 @@ golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbR
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+76 -8
View File
@@ -365,7 +365,12 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
changesRequestedPRs := prChangesRequested[login]
// Count merged PRs that didn't have changes requested
for _, pr := range data.PullRequests {
if pr.Author.Login == login && pr.IsMerged() {
// Normalize PR author login before comparison
prLogin := pr.Author.Login
if mapped, ok := loginToLogin[prLogin]; ok {
prLogin = mapped
}
if prLogin == login && pr.IsMerged() {
if changesRequestedPRs == nil || !changesRequestedPRs[pr.Number] {
cm.PerfectPRs++
}
@@ -437,12 +442,18 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
}
// Count issue references in commits (e.g., "fixes #123", "closes #456", "refs #789")
// Skip merge commits which naturally contain #PR numbers
for _, commit := range data.Commits {
login := commit.Author.Login
if login == "" {
continue
}
// Skip merge commits - they contain #PR numbers that shouldn't count as issue refs
if isMergeCommit(commit.Message) {
continue
}
// Normalize login
if mappedLogin, ok := emailToLogin[commit.Author.Email]; ok {
login = mappedLogin
@@ -465,6 +476,22 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
}
}
// Build reverse mapping: raw PR author login -> normalized login
// This is needed because contributorMap keys are normalized but pr.Author.Login is not
prAuthorToNormalizedLogin := make(map[string]string)
for _, pr := range data.PullRequests {
rawLogin := pr.Author.Login
if rawLogin == "" {
continue
}
normalizedLogin := rawLogin
// Check if this raw login maps to a different normalized login
if mapped, ok := loginToLogin[rawLogin]; ok {
normalizedLogin = mapped
}
prAuthorToNormalizedLogin[rawLogin] = normalizedLogin
}
// Calculate averages and finalize contributor metrics
for login, cm := range contributorMap {
// Calculate average time to merge
@@ -481,7 +508,12 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
if cm.PRsOpened > 0 {
totalPRLines := 0
for _, pr := range data.PullRequests {
if pr.Author.Login == login {
// Normalize PR author login before comparison
prLogin := pr.Author.Login
if normalized, ok := prAuthorToNormalizedLogin[prLogin]; ok {
prLogin = normalized
}
if prLogin == login {
totalPRLines += pr.TotalChanges()
}
}
@@ -531,7 +563,12 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
if rcm.PRsOpened > 0 {
totalPRLines := 0
for _, pr := range data.PullRequests {
if pr.Author.Login == login && pr.Repository == repo {
// Normalize PR author login before comparison
prLogin := pr.Author.Login
if mapped, ok := loginToLogin[prLogin]; ok {
prLogin = mapped
}
if prLogin == login && pr.Repository == repo {
totalPRLines += pr.TotalChanges()
}
}
@@ -540,7 +577,12 @@ func (a *Aggregator) Aggregate(data *models.RawData, dateRange *config.ParsedDat
// Calculate perfect PRs for this repo
for _, pr := range data.PullRequests {
if pr.Author.Login == login && pr.Repository == repo && pr.IsMerged() {
// Normalize PR author login before comparison
prLogin := pr.Author.Login
if mapped, ok := loginToLogin[prLogin]; ok {
prLogin = mapped
}
if prLogin == login && pr.Repository == repo && pr.IsMerged() {
changesRequestedPRs := prChangesRequested[login]
if changesRequestedPRs == nil || !changesRequestedPRs[pr.Number] {
rcm.PerfectPRs++
@@ -1332,8 +1374,10 @@ func calculateStreaks(days map[string]bool) (longest, current int) {
streak := 1
for i := 1; i < len(dates); i++ {
diff := dates[i].Sub(dates[i-1]).Hours() / 24
if diff == 1 {
// Use integer day difference to avoid floating point precision issues with DST
diffHours := dates[i].Sub(dates[i-1]).Hours()
diffDays := int(diffHours/24 + 0.5) // Round to nearest integer
if diffDays == 1 {
streak++
if streak > longest {
longest = streak
@@ -1345,8 +1389,10 @@ func calculateStreaks(days map[string]bool) (longest, current int) {
// Check if current streak is still active (last activity was today or yesterday)
today := time.Now().Truncate(24 * time.Hour)
lastActive := dates[len(dates)-1]
daysSinceLastActive := today.Sub(lastActive).Hours() / 24
// Truncate lastActive to midnight as well for consistent comparison
lastActive := dates[len(dates)-1].Truncate(24 * time.Hour)
diffHours := today.Sub(lastActive).Hours()
daysSinceLastActive := int(diffHours/24 + 0.5) // Round to nearest integer
if daysSinceLastActive <= 1 {
current = streak
@@ -1385,3 +1431,25 @@ func countIssueReferences(message string) int {
return count
}
// isMergeCommit checks if a commit message indicates a merge commit
// Merge commits should be skipped when counting issue references as they
// naturally contain #PR numbers from the merged PR titles
func isMergeCommit(message string) bool {
// Common merge commit patterns:
// - "Merge pull request #123 from ..."
// - "Merge branch 'feature' into ..."
// - "Merge remote-tracking branch ..."
// - "Merge commit ..."
if len(message) < 6 {
return false
}
// Check if message starts with "Merge " (case-insensitive for first letter)
prefix := message[:6]
if prefix == "Merge " || prefix == "merge " {
return true
}
return false
}
@@ -0,0 +1,220 @@
package aggregator
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
// TestStreakCalculation_FloatPrecisionBug tests the potential floating point precision issues in streak calculation
func TestStreakCalculation_FloatPrecisionBug(t *testing.T) {
t.Parallel()
t.Run("consecutive days with different hours", func(t *testing.T) {
t.Parallel()
// Bug: Line 1335 in aggregator.go uses floating point division
// diff := dates[i].Sub(dates[i-1]).Hours() / 24
// This can cause precision issues when checking diff == 1
dates := map[string]bool{
"2024-01-15": true, // Day 1 at 00:00
"2024-01-16": true, // Day 2 at 00:00
"2024-01-17": true, // Day 3 at 00:00
}
longest, _ := calculateStreaks(dates)
// This should be 3, but floating point comparison might fail
assert.Equal(t, 3, longest, "Should calculate 3-day streak correctly")
})
t.Run("dates with daylight saving time boundary", func(t *testing.T) {
t.Parallel()
// Create dates that cross a DST boundary
// On DST change, a "day" might be 23 or 25 hours, not exactly 24
// This would cause the streak to break incorrectly
loc, _ := time.LoadLocation("America/New_York")
// March 2024: DST starts on March 10, 2024 at 2:00 AM (clocks move to 3:00 AM)
day1 := time.Date(2024, 3, 9, 12, 0, 0, 0, loc) // Day before DST
day2 := time.Date(2024, 3, 10, 12, 0, 0, 0, loc) // DST change day (23-hour day)
day3 := time.Date(2024, 3, 11, 12, 0, 0, 0, loc) // Day after DST
dates := map[string]bool{
day1.Format("2006-01-02"): true,
day2.Format("2006-01-02"): true,
day3.Format("2006-01-02"): true,
}
longest, _ := calculateStreaks(dates)
// Bug: The floating point comparison diff == 1 might fail due to DST
// day1 to day2: 23 hours / 24 = 0.958... != 1.0 (streak breaks)
// This test documents the bug - it should pass with value 3, but might return 1 or 2
assert.GreaterOrEqual(t, longest, 1, "Should handle DST boundaries")
// The actual expected value is 3, but due to the bug it might be less
})
t.Run("consecutive days at different times of day", func(t *testing.T) {
t.Parallel()
// Even without DST, different times of day can cause issues
// Day 1 at 10:00, Day 2 at 9:00 = 23 hours apart (not exactly 24)
// 23 / 24 = 0.958... != 1.0
loc := time.UTC
day1 := time.Date(2024, 1, 15, 10, 0, 0, 0, loc)
day2 := time.Date(2024, 1, 16, 9, 0, 0, 0, loc) // 23 hours later
day3 := time.Date(2024, 1, 17, 11, 0, 0, 0, loc) // 26 hours later
dates := map[string]bool{
day1.Format("2006-01-02"): true,
day2.Format("2006-01-02"): true,
day3.Format("2006-01-02"): true,
}
longest, _ := calculateStreaks(dates)
// With float comparison, this might break the streak
// Expected: 3, Actual might be: 1, 2, or 3 depending on precision
assert.GreaterOrEqual(t, longest, 1, "Should not panic")
// Document: This is a known bug - should be 3 but might be less due to time differences
})
}
// TestStreakCalculation_CurrentStreakBoundaryCondition tests current streak calculation edge cases
func TestStreakCalculation_CurrentStreakBoundaryCondition(t *testing.T) {
t.Parallel()
t.Run("last activity exactly 1 day ago", func(t *testing.T) {
t.Parallel()
// Line 1351: if daysSinceLastActive <= 1
// This uses float comparison which can be problematic
now := time.Now()
yesterday := now.Add(-24 * time.Hour)
dates := map[string]bool{
yesterday.Format("2006-01-02"): true,
}
_, current := calculateStreaks(dates)
// Float comparison: (now - yesterday).Hours() / 24 might not be exactly 1.0
// Due to precision, it might be 0.999... or 1.001...
// This test should pass but documents the fragility
assert.GreaterOrEqual(t, current, 0, "Should not panic")
})
t.Run("last activity exactly at boundary", func(t *testing.T) {
t.Parallel()
// Edge case: What if the last activity was exactly 24.0000 hours ago?
// Line 1351: daysSinceLastActive <= 1
// With float precision, 24.0 hours / 24 = 1.0, so <= 1 should pass
now := time.Now().Truncate(24 * time.Hour)
exactlyOneDayAgo := now.Add(-24 * time.Hour)
dates := map[string]bool{
exactlyOneDayAgo.Format("2006-01-02"): true,
}
_, current := calculateStreaks(dates)
// This should preserve the streak since it's exactly 1 day
// But float precision might cause issues
assert.GreaterOrEqual(t, current, 0, "Should handle exact 24-hour boundary")
})
}
// TestStreakCalculation_EmptyOrSingleDate tests edge cases with minimal data
func TestStreakCalculation_EmptyOrSingleDate(t *testing.T) {
t.Parallel()
t.Run("empty dates map", func(t *testing.T) {
t.Parallel()
dates := map[string]bool{}
longest, current := calculateStreaks(dates)
assert.Equal(t, 0, longest)
assert.Equal(t, 0, current)
})
t.Run("single date", func(t *testing.T) {
t.Parallel()
dates := map[string]bool{
"2024-01-15": true,
}
longest, current := calculateStreaks(dates)
assert.Equal(t, 1, longest, "Single date should be streak of 1")
// current depends on how far in the past this date is
assert.GreaterOrEqual(t, current, 0)
})
}
// TestStreakCalculation_DateParsingError documents behavior with invalid dates
func TestStreakCalculation_DateParsingError(t *testing.T) {
t.Parallel()
t.Run("invalid date format", func(t *testing.T) {
t.Parallel()
dates := map[string]bool{
"invalid-date": true,
"2024-01-15": true,
}
// The function parses dates with time.Parse("2006-01-02", dateStr)
// Invalid dates are silently skipped (err != nil check on line 1316)
longest, current := calculateStreaks(dates)
// Only the valid date counts
assert.Equal(t, 1, longest, "Should skip invalid dates")
assert.GreaterOrEqual(t, current, 0)
})
}
// TestStreakCalculation_LargeGaps tests streak reset with large gaps
func TestStreakCalculation_LargeGaps(t *testing.T) {
t.Parallel()
t.Run("large gap between dates", func(t *testing.T) {
t.Parallel()
dates := map[string]bool{
"2024-01-01": true,
"2024-01-02": true,
"2024-01-03": true,
"2024-02-15": true, // Large gap - should reset streak
"2024-02-16": true,
}
longest, _ := calculateStreaks(dates)
// Longest streak should be 3 (Jan 1-3)
assert.Equal(t, 3, longest, "Should correctly identify longest streak despite gap")
})
t.Run("multiple equal-length streaks", func(t *testing.T) {
t.Parallel()
dates := map[string]bool{
"2024-01-01": true,
"2024-01-02": true,
"2024-01-03": true,
"2024-02-01": true, // Gap
"2024-02-02": true,
"2024-02-03": true,
}
longest, _ := calculateStreaks(dates)
// Two 3-day streaks - should return 3
assert.Equal(t, 3, longest, "Should return longest streak when multiple equal streaks exist")
})
}
+151 -106
View File
@@ -58,18 +58,16 @@ func (a *App) Run(ctx context.Context) error {
a.log("%s", msg)
})
// Initialize local git repository manager if using local git
if a.config.Options.UseLocalGit {
a.log("Initializing local git repository manager...")
gitRepo, err := git.NewRepository(a.config.Options.CloneDirectory)
if err != nil {
return fmt.Errorf("failed to create git repository manager: %w", err)
}
gitRepo.SetProgressCallback(func(msg string) {
a.log("%s", msg)
})
a.gitRepo = gitRepo
// Initialize local git repository manager (always used for accurate commit data)
a.log("Initializing local git repository manager...")
gitRepo, err := git.NewRepository(a.config.Options.CloneDirectory)
if err != nil {
return fmt.Errorf("failed to create git repository manager: %w", err)
}
gitRepo.SetProgressCallback(func(msg string) {
a.log("%s", msg)
})
a.gitRepo = gitRepo
// Parse date range
dateRange, err := a.config.GetParsedDateRange()
@@ -163,31 +161,34 @@ func (a *App) collectRepoData(ctx context.Context, owner, name string, dateRange
repoName := fmt.Sprintf("%s/%s", owner, name)
a.log(" Fetching data from %s...", repoName)
// Fetch commits - use local git if enabled (much faster)
var commits []models.Commit
var err error
// Clone/update repository locally (required for accurate commit data)
token := a.config.Auth.GithubToken
if a.gitRepo != nil {
// Clone/update repository locally
token := a.config.Auth.GithubToken
cloneErr := a.gitRepo.EnsureCloned(ctx, owner, name, token)
if cloneErr != nil {
a.log(" Warning: failed to clone repository locally, falling back to API: %v", cloneErr)
// Fallback to API
commits, err = a.client.FetchCommits(ctx, owner, name, dateRange.Start, dateRange.End)
} else {
// Use local git for commits
commits, err = a.gitRepo.FetchCommits(ctx, owner, name, dateRange.Start, dateRange.End)
// Determine clone options (shallow clone if enabled)
var cloneOpts *git.CloneOptions
if a.config.Options.ShallowClone && dateRange.Start != nil {
// Get commit count since start date to determine shallow clone depth
commitCount, countErr := a.client.GetCommitCountSince(ctx, owner, name, *dateRange.Start)
if countErr != nil {
a.log(" Warning: failed to get commit count for shallow clone: %v", countErr)
// Proceed with full clone
} else if commitCount > 0 {
// Add buffer for safety margin
depth := commitCount + a.config.Options.ShallowCloneBuffer
cloneOpts = &git.CloneOptions{Depth: depth}
a.log(" Using shallow clone (depth: %d = %d commits + %d buffer)", depth, commitCount, a.config.Options.ShallowCloneBuffer)
}
} else {
// Use API for commits
commits, err = a.client.FetchCommits(ctx, owner, name, dateRange.Start, dateRange.End)
}
if err := a.gitRepo.EnsureClonedWithOptions(ctx, owner, name, token, cloneOpts); err != nil {
return fmt.Errorf("failed to clone repository %s: %w", repoName, err)
}
// Fetch commits from local git clone
commits, err := a.gitRepo.FetchCommits(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch commits: %w", err)
}
a.log(" Found %d commits", len(commits))
// Filter out bots
for _, c := range commits {
@@ -196,87 +197,77 @@ func (a *App) collectRepoData(ctx context.Context, owner, name string, dateRange
}
}
// Fetch pull requests
prs, err := a.client.FetchPullRequests(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch pull requests: %w", err)
}
a.log(" Found %d pull requests", len(prs))
for _, pr := range prs {
if !a.config.IsBot(pr.Author.Login) {
data.PullRequests = append(data.PullRequests, pr)
}
}
// Fetch reviews in parallel for all PRs (already filtered by FetchPullRequests)
if len(prs) > 0 {
a.log(" Fetching reviews for %d PRs in parallel...", len(prs))
type reviewResult struct {
reviews []models.Review
err error
}
// Use worker pool to limit concurrent requests
concurrency := a.config.Options.ConcurrentRequests
if concurrency <= 0 {
concurrency = 5
}
results := make(chan reviewResult, len(prs))
sem := make(chan struct{}, concurrency)
for _, pr := range prs {
go func(prNum int) {
sem <- struct{}{} // Acquire
defer func() { <-sem }() // Release
reviews, err := a.client.FetchReviews(ctx, owner, name, prNum)
results <- reviewResult{reviews: reviews, err: err}
}(pr.Number)
}
// Collect results
reviewCount := 0
for i := 0; i < len(prs); i++ {
result := <-results
if result.err != nil {
continue
// Fetch pull requests and reviews
// Use GraphQL if available (much fewer API calls), otherwise fall back to REST
if a.client.HasGraphQL() {
prs, reviews, err := a.client.FetchPRsWithReviewsGraphQL(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
a.log(" Warning: GraphQL fetch failed, falling back to REST: %v", err)
// Fall back to REST
prs, reviews, err = a.fetchPRsAndReviewsREST(ctx, owner, name, dateRange, data)
if err != nil {
return err
}
for _, r := range result.reviews {
if !a.config.IsBot(r.Author.Login) {
data.Reviews = append(data.Reviews, r)
reviewCount++
}
// Filter out bots
for _, pr := range prs {
if !a.config.IsBot(pr.Author.Login) {
data.PullRequests = append(data.PullRequests, pr)
}
}
for _, r := range reviews {
if !a.config.IsBot(r.Author.Login) {
data.Reviews = append(data.Reviews, r)
}
}
} else {
// Use REST API
prs, reviews, err := a.fetchPRsAndReviewsREST(ctx, owner, name, dateRange, data)
if err != nil {
return err
}
// Filter out bots and add to data
for _, pr := range prs {
if !a.config.IsBot(pr.Author.Login) {
data.PullRequests = append(data.PullRequests, pr)
}
}
for _, r := range reviews {
if !a.config.IsBot(r.Author.Login) {
data.Reviews = append(data.Reviews, r)
}
}
}
// Fetch issues and comments
// Use GraphQL if available (much fewer API calls), otherwise fall back to REST
if a.client.HasGraphQL() {
issues, comments, err := a.client.FetchIssuesWithCommentsGraphQL(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
a.log(" Warning: GraphQL fetch failed, falling back to REST: %v", err)
// Fall back to REST
if err := a.fetchIssuesAndCommentsREST(ctx, owner, name, dateRange, data); err != nil {
return err
}
} else {
// Filter out bots
for _, issue := range issues {
if !a.config.IsBot(issue.Author.Login) {
data.Issues = append(data.Issues, issue)
}
}
for _, comment := range comments {
if !a.config.IsBot(comment.Author.Login) {
data.IssueComments = append(data.IssueComments, comment)
}
}
}
a.log(" Found %d reviews across %d PRs", reviewCount, len(prs))
}
// Fetch issues
issues, err := a.client.FetchIssues(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch issues: %w", err)
}
a.log(" Found %d issues", len(issues))
for _, issue := range issues {
if !a.config.IsBot(issue.Author.Login) {
data.Issues = append(data.Issues, issue)
}
}
// Fetch issue comments
issueComments, err := a.client.FetchIssueComments(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch issue comments: %w", err)
}
a.log(" Found %d issue comments", len(issueComments))
for _, comment := range issueComments {
if !a.config.IsBot(comment.Author.Login) {
data.IssueComments = append(data.IssueComments, comment)
} else {
// Use REST API
if err := a.fetchIssuesAndCommentsREST(ctx, owner, name, dateRange, data); err != nil {
return err
}
}
@@ -337,3 +328,57 @@ func (a *App) fetchUserProfiles(ctx context.Context, data *models.RawData) (map[
return profiles, nil
}
// fetchPRsAndReviewsREST fetches PRs and reviews using the REST API (fallback when GraphQL fails)
func (a *App) fetchPRsAndReviewsREST(ctx context.Context, owner, name string, dateRange *config.ParsedDateRange, data *models.RawData) ([]models.PullRequest, []models.Review, error) {
prs, err := a.client.FetchPullRequests(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch pull requests: %w", err)
}
a.log(" Found %d pull requests", len(prs))
// Fetch reviews for each PR
var reviews []models.Review
for _, pr := range prs {
prReviews, err := a.client.FetchReviews(ctx, owner, name, pr.Number)
if err != nil {
a.log(" Warning: failed to fetch reviews for PR #%d: %v", pr.Number, err)
continue
}
reviews = append(reviews, prReviews...)
}
a.log(" Found %d reviews (REST)", len(reviews))
return prs, reviews, nil
}
// fetchIssuesAndCommentsREST fetches issues and comments using the REST API (fallback when GraphQL fails)
func (a *App) fetchIssuesAndCommentsREST(ctx context.Context, owner, name string, dateRange *config.ParsedDateRange, data *models.RawData) error {
issues, err := a.client.FetchIssues(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch issues: %w", err)
}
a.log(" Found %d issues", len(issues))
// Filter out bots and add to data
for _, issue := range issues {
if !a.config.IsBot(issue.Author.Login) {
data.Issues = append(data.Issues, issue)
}
}
// Fetch all comments for the repository within date range
comments, err := a.client.FetchIssueComments(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
a.log(" Warning: failed to fetch issue comments: %v", err)
} else {
for _, comment := range comments {
if !a.config.IsBot(comment.Author.Login) {
data.IssueComments = append(data.IssueComments, comment)
}
}
a.log(" Found %d issue comments (REST)", len(comments))
}
return nil
}
+34 -33
View File
@@ -90,10 +90,6 @@ type PointsConfig struct {
FastReview4h int `yaml:"fast_review_4h"`
FastReview24h int `yaml:"fast_review_24h"`
OutOfHours int `yaml:"out_of_hours"` // Bonus per commit outside 9am-5pm
// UseMeaningfulLines determines whether scoring uses meaningful lines (excluding comments/whitespace)
// or raw line counts. Default is true for more accurate contribution scoring.
UseMeaningfulLines bool `yaml:"use_meaningful_lines"`
}
// AchievementConfig defines an achievement badge
@@ -149,7 +145,9 @@ type OptionsConfig struct {
IncludeBots bool `yaml:"include_bots"`
AdditionalBotPatterns []string `yaml:"additional_bot_patterns"` // User-defined patterns (added to hardcoded defaults)
CloneDirectory string `yaml:"clone_directory"` // Directory for local git clones
UseLocalGit bool `yaml:"use_local_git"` // Use local git for commits (faster)
ShallowClone bool `yaml:"shallow_clone"` // Use shallow clone based on date range (faster cloning)
ShallowCloneBuffer int `yaml:"shallow_clone_buffer"` // Extra commits to fetch beyond date range (default: 100)
UseGraphQL bool `yaml:"use_graphql"` // Use GraphQL API for batched queries (fewer API calls)
UserAliases []UserAlias `yaml:"user_aliases,omitempty"` // Manual email/name to login mappings
}
@@ -157,16 +155,18 @@ type OptionsConfig struct {
// These cannot be overridden by users to ensure consistent bot filtering
func DefaultBotPatterns() []string {
return []string{
"*[bot]", // GitHub App bots: dependabot[bot], renovate[bot], etc.
"dependabot*", // Dependabot variants
"renovate*", // Renovate bot variants
"github-actions*", // GitHub Actions
"codecov*", // Codecov bot
"snyk*", // Snyk security bot
"greenkeeper*", // Greenkeeper (legacy)
"imgbot*", // Image optimization bot
"allcontributors*", // All Contributors bot
"semantic-release*", // Semantic release bot
"*[bot]", // GitHub App bots: dependabot[bot], renovate[bot], etc.
"dependabot*", // Dependabot variants
"renovate*", // Renovate bot variants
"github-actions*", // GitHub Actions
"github-advanced-security", // GitHub Advanced Security
"*-actions-runner", // Self-hosted GitHub Actions runners
"codecov*", // Codecov bot
"snyk*", // Snyk security bot
"greenkeeper*", // Greenkeeper (legacy)
"imgbot*", // Image optimization bot
"allcontributors*", // All Contributors bot
"semantic-release*", // Semantic release bot
}
}
@@ -191,23 +191,22 @@ func DefaultConfig() *Config {
Scoring: ScoringConfig{
Enabled: true,
Points: PointsConfig{
Commit: 10,
CommitWithTests: 15,
LinesAdded: 0.1,
LinesDeleted: 0.05,
PROpened: 25,
PRMerged: 50,
PRReviewed: 30,
ReviewComment: 5,
IssueOpened: 10,
IssueClosed: 20,
IssueComment: 5,
IssueReference: 5,
FastReview1h: 50,
FastReview4h: 25,
FastReview24h: 10,
OutOfHours: 2,
UseMeaningfulLines: true, // Default to meaningful lines for accurate contribution scoring
Commit: 10,
CommitWithTests: 15,
LinesAdded: 0.1,
LinesDeleted: 0.05,
PROpened: 25,
PRMerged: 50,
PRReviewed: 30,
ReviewComment: 5,
IssueOpened: 10,
IssueClosed: 20,
IssueComment: 5,
IssueReference: 5,
FastReview1h: 50,
FastReview4h: 25,
FastReview24h: 10,
OutOfHours: 2,
},
},
Output: OutputConfig{
@@ -228,7 +227,9 @@ func DefaultConfig() *Config {
IncludeBots: false,
AdditionalBotPatterns: []string{}, // Users can add custom patterns here
CloneDirectory: "./.repos",
UseLocalGit: true, // Default to faster local git analysis
ShallowClone: true, // Default to shallow clone for faster cloning
ShallowCloneBuffer: 25, // Extra commits beyond date range for safety margin
UseGraphQL: true, // Default to GraphQL for fewer API calls
},
}
}
+11 -11
View File
@@ -80,10 +80,15 @@ func (c *Calculator) Calculate(metrics *models.GlobalMetrics) *models.GlobalMetr
return contributors[i].Score.Total > contributors[j].Score.Total
})
// Assign ranks
// Assign ranks (guard against empty slice for percentile calculation)
numContributors := len(contributors)
for i := range contributors {
contributors[i].Score.Rank = i + 1
contributors[i].Score.PercentileRank = float64(len(contributors)-i) / float64(len(contributors)) * 100
if numContributors > 0 {
contributors[i].Score.PercentileRank = float64(numContributors-i) / float64(numContributors) * 100
} else {
contributors[i].Score.PercentileRank = 0
}
}
// Build leaderboard
@@ -167,15 +172,10 @@ func (c *Calculator) calculateScore(cm *models.ContributorMetrics) models.Score
// Commit points
breakdown.Commits = cm.CommitCount * points.Commit
// Line change points - use meaningful lines if configured, otherwise raw counts
linesAdded := cm.LinesAdded
linesDeleted := cm.LinesDeleted
if points.UseMeaningfulLines {
linesAdded = cm.MeaningfulLinesAdded
linesDeleted = cm.MeaningfulLinesDeleted
}
breakdown.LineChanges = int(float64(linesAdded)*points.LinesAdded +
float64(linesDeleted)*points.LinesDeleted)
// Line change points - always use meaningful lines (excluding comments/whitespace)
// to accurately reflect actual code contribution
breakdown.LineChanges = int(float64(cm.MeaningfulLinesAdded)*points.LinesAdded +
float64(cm.MeaningfulLinesDeleted)*points.LinesDeleted)
// PR points
breakdown.PRs = cm.PRsOpened*points.PROpened + cm.PRsMerged*points.PRMerged
+9 -51
View File
@@ -71,6 +71,8 @@ func TestCalculator_BasicScoring(t *testing.T) {
CommitCount: 10,
LinesAdded: 1000,
LinesDeleted: 500,
MeaningfulLinesAdded: 1000, // Same as raw for this test
MeaningfulLinesDeleted: 500,
PRsOpened: 5,
PRsMerged: 3,
ReviewsGiven: 8,
@@ -91,7 +93,7 @@ func TestCalculator_BasicScoring(t *testing.T) {
// Verify score breakdown:
// Commits: 10 * 10 = 100
// Lines: 1000 * 0.1 + 500 * 0.05 = 100 + 25 = 125
// Lines (meaningful): 1000 * 0.1 + 500 * 0.05 = 100 + 25 = 125
// PRs: 5 * 25 + 3 * 50 = 125 + 150 = 275
// Reviews: 8 * 30 + 20 * 5 = 240 + 100 = 340
// Total: 100 + 125 + 275 + 340 = 840
@@ -860,10 +862,9 @@ func TestCalculator_MeaningfulLinesScoring(t *testing.T) {
cfg := config.DefaultConfig()
cfg.Scoring.Enabled = true
cfg.Scoring.Points = config.PointsConfig{
Commit: 10,
LinesAdded: 0.1,
LinesDeleted: 0.05,
UseMeaningfulLines: true, // Use meaningful lines
Commit: 10,
LinesAdded: 0.1,
LinesDeleted: 0.05,
}
calc := NewCalculator(cfg)
@@ -897,58 +898,15 @@ func TestCalculator_MeaningfulLinesScoring(t *testing.T) {
assert.Equal(t, 200, contributor.Score.Total)
})
t.Run("uses raw lines when disabled", func(t *testing.T) {
t.Parallel()
cfg := config.DefaultConfig()
cfg.Scoring.Enabled = true
cfg.Scoring.Points = config.PointsConfig{
Commit: 10,
LinesAdded: 0.1,
LinesDeleted: 0.05,
UseMeaningfulLines: false, // Use raw lines
}
calc := NewCalculator(cfg)
metrics := &models.GlobalMetrics{
Repositories: []models.RepositoryMetrics{
{
FullName: "owner/repo",
Contributors: []models.ContributorMetrics{
{
Login: "user1",
CommitCount: 10,
LinesAdded: 1000, // Raw lines
LinesDeleted: 500,
MeaningfulLinesAdded: 800, // Meaningful lines (should be ignored)
MeaningfulLinesDeleted: 400,
RepositoriesContributed: []string{"owner/repo"},
},
},
},
},
}
result := calc.Calculate(metrics)
contributor := result.Repositories[0].Contributors[0]
// Line change points should use raw lines:
// Raw: 1000 * 0.1 + 500 * 0.05 = 100 + 25 = 125
assert.Equal(t, 125, contributor.Score.Breakdown.LineChanges)
// Total: Commits (10 * 10 = 100) + Lines (125) = 225
assert.Equal(t, 225, contributor.Score.Total)
})
t.Run("comment-only changes score zero meaningful lines", func(t *testing.T) {
t.Parallel()
cfg := config.DefaultConfig()
cfg.Scoring.Enabled = true
cfg.Scoring.Points = config.PointsConfig{
Commit: 10,
LinesAdded: 0.1,
LinesDeleted: 0.05,
UseMeaningfulLines: true,
Commit: 10,
LinesAdded: 0.1,
LinesDeleted: 0.05,
}
calc := NewCalculator(cfg)
+132 -7
View File
@@ -3,12 +3,15 @@ package git
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/charmbracelet/bubbles/progress"
"github.com/charmbracelet/lipgloss"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
@@ -18,6 +21,55 @@ import (
"github.com/lukaszraczylo/git-velocity/internal/domain/models"
)
// commitProgressBar handles terminal progress display for commit iteration
type commitProgressBar struct {
progress progress.Model
label string
current int
out io.Writer
}
func newCommitProgressBar(label string) *commitProgressBar {
p := progress.New(
progress.WithDefaultGradient(),
progress.WithWidth(40),
)
return &commitProgressBar{
progress: p,
label: label,
current: 0,
out: os.Stderr,
}
}
func (p *commitProgressBar) update(count int) {
p.current = count
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
// Use a spinner-like display since we don't know total
spinner := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}
spinChar := spinner[count%len(spinner)]
fmt.Fprintf(p.out, "\r%s %s %s",
labelStyle.Render(p.label),
spinChar,
countStyle.Render(fmt.Sprintf("%d commits", p.current)),
)
}
func (p *commitProgressBar) done(total int) {
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
fmt.Fprintf(p.out, "\r%s %s %s\n",
labelStyle.Render(p.label),
p.progress.ViewAs(1.0),
countStyle.Render(fmt.Sprintf("%d commits", total)),
)
}
// ProgressCallback is called to report progress during git operations
type ProgressCallback func(message string)
@@ -52,8 +104,19 @@ func (r *Repository) repoPath(owner, name string) string {
return filepath.Join(r.baseDir, owner, name)
}
// CloneOptions contains options for cloning a repository
type CloneOptions struct {
// Depth limits the clone to the specified number of commits (0 = full clone)
Depth int
}
// EnsureCloned ensures a repository is cloned and up to date
func (r *Repository) EnsureCloned(ctx context.Context, owner, name, token string) error {
return r.EnsureClonedWithOptions(ctx, owner, name, token, nil)
}
// EnsureClonedWithOptions ensures a repository is cloned with specific options
func (r *Repository) EnsureClonedWithOptions(ctx context.Context, owner, name, token string, opts *CloneOptions) error {
repoPath := r.repoPath(owner, name)
// Check if already cloned
@@ -65,12 +128,16 @@ func (r *Repository) EnsureCloned(ctx context.Context, owner, name, token string
}
// Clone the repository
r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name))
return r.clone(ctx, owner, name, token, repoPath)
if opts != nil && opts.Depth > 0 {
r.progress(fmt.Sprintf(" Shallow cloning %s/%s (depth: %d)...", owner, name, opts.Depth))
} else {
r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name))
}
return r.clone(ctx, owner, name, token, repoPath, opts)
}
// clone clones a repository using go-git
func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string) error {
func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string, opts *CloneOptions) error {
// Create parent directory
if err := os.MkdirAll(filepath.Dir(destPath), 0750); err != nil {
return fmt.Errorf("failed to create parent directory: %w", err)
@@ -83,6 +150,11 @@ func (r *Repository) clone(ctx context.Context, owner, name, token, destPath str
Progress: nil, // Could add progress writer here
}
// Apply shallow clone depth if provided
if opts != nil && opts.Depth > 0 {
cloneOpts.Depth = opts.Depth
}
// Add authentication if token provided
if token != "" {
cloneOpts.Auth = &http.BasicAuth{
@@ -138,8 +210,6 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil, fmt.Errorf("failed to open repository: %w", err)
}
r.progress(" Iterating commits with go-git...")
// Get all references to iterate all branches
refs, err := repo.References()
if err != nil {
@@ -151,6 +221,20 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
var commits []models.Commit
testPatterns := []string{"_test.go", ".test.", ".spec.", "/tests/", "/test/", "__tests__"}
// Progress bar for commit iteration
pbar := newCommitProgressBar(" Iterating commits:")
processedCount := 0
// Hard cutoff: 1 week before start date - stop iterating entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
// errStopIteration is used to signal early termination (not a real error)
var errStopIteration = fmt.Errorf("stop iteration")
err = refs.ForEach(func(ref *plumbing.Reference) error {
// Skip non-branch references
if !ref.Name().IsBranch() && !ref.Name().IsRemote() && !ref.Name().IsTag() {
@@ -168,6 +252,7 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil
}
consecutiveOld := 0
err = commitIter.ForEach(func(c *object.Commit) error {
// Check context cancellation
select {
@@ -181,13 +266,31 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil
}
seenCommits[c.Hash] = true
processedCount++
// Update progress every 10 commits to avoid too much I/O
if processedCount%10 == 0 {
pbar.update(processedCount)
}
commitTime := c.Author.When
// Hard cutoff - stop entirely if past this date
if hardCutoff != nil && commitTime.Before(*hardCutoff) {
return errStopIteration
}
// Filter by date range
if since != nil && commitTime.Before(*since) {
consecutiveOld++
// Early termination: if we've seen 100 consecutive old commits, stop this branch
if consecutiveOld >= 100 {
return errStopIteration
}
return nil
}
consecutiveOld = 0 // Reset counter when we find a valid commit
if until != nil && commitTime.After(*until) {
return nil
}
@@ -229,15 +332,27 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil
})
// Handle expected termination conditions
if err == errStopIteration {
return nil // Not an error, just early termination for this branch
}
// Handle shallow clone boundary - "object not found" means we've reached
// the edge of the shallow clone history, which is expected behavior
if err != nil && isShallowBoundaryError(err) {
err = nil // Treat as normal end of history
}
return err
})
// Complete progress bar
pbar.done(len(commits))
if err != nil {
return nil, fmt.Errorf("failed to iterate commits: %w", err)
}
r.progress(fmt.Sprintf(" Found %d commits", len(commits)))
return commits, nil
}
@@ -352,6 +467,16 @@ func (r *Repository) getCommitStats(c *object.Commit, testPatterns []string) com
return stats
}
// isShallowBoundaryError checks if an error indicates we've hit the shallow clone boundary
func isShallowBoundaryError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
// go-git returns "object not found" when trying to access commits beyond shallow depth
return strings.Contains(errStr, "object not found")
}
// extractLoginFromEmail tries to extract GitHub login from email
func extractLoginFromEmail(email, fallbackName string) string {
// Pattern: 12345678+username@users.noreply.github.com
+13 -5
View File
@@ -165,20 +165,28 @@ func NewMemoryCache(ttl time.Duration) *MemoryCache {
// Get retrieves a value from the cache
func (c *MemoryCache) Get(key string) (interface{}, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
entry, ok := c.data[key]
if !ok {
c.mu.RUnlock()
return nil, false
}
// Check expiration
// Check expiration - if expired, upgrade to write lock to delete
if time.Now().After(entry.ExpiresAt) {
delete(c.data, key)
c.mu.RUnlock()
// Upgrade to write lock for deletion
c.mu.Lock()
// Re-check in case another goroutine already deleted it
if entry, ok := c.data[key]; ok && time.Now().After(entry.ExpiresAt) {
delete(c.data, key)
}
c.mu.Unlock()
return nil, false
}
return entry.Value, true
value := entry.Value
c.mu.RUnlock()
return value, true
}
// Set stores a value in the cache
+286 -329
View File
@@ -41,6 +41,7 @@ func DefaultRetryConfig() RetryConfig {
// Client wraps the GitHub API client with rate limiting and caching
type Client struct {
gh *github.Client
gql *GraphQLClient // GraphQL client for batched queries
config *config.Config
cache cache.Cache
retry RetryConfig
@@ -91,8 +92,15 @@ func NewClient(ctx context.Context, cfg *config.Config) (*Client, error) {
c = cache.NewNoopCache()
}
// Initialize GraphQL client if using token auth (GraphQL doesn't support GitHub App auth easily)
var gql *GraphQLClient
if cfg.HasGithubToken() && cfg.Options.UseGraphQL {
gql = NewGraphQLClient(cfg.Auth.GithubToken)
}
return &Client{
gh: gh,
gql: gql,
config: cfg,
cache: c,
retry: DefaultRetryConfig(),
@@ -107,6 +115,73 @@ func (c *Client) SetProgressCallback(cb ProgressCallback) {
}
}
// HasGraphQL returns true if the GraphQL client is available
func (c *Client) HasGraphQL() bool {
return c.gql != nil
}
// FetchPRsWithReviewsGraphQL fetches PRs and reviews using GraphQL (much fewer API calls)
func (c *Client) FetchPRsWithReviewsGraphQL(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.PullRequest, []models.Review, error) {
if c.gql == nil {
return nil, nil, fmt.Errorf("GraphQL client not initialized")
}
cacheKey := fmt.Sprintf("gql_prs_reviews:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
type cachedData struct {
PRs []models.PullRequest
Reviews []models.Review
}
if cached, ok := c.cache.Get(cacheKey); ok {
if data, ok := cached.(cachedData); ok {
c.progress(" Using cached PRs and reviews data (GraphQL)")
return data.PRs, data.Reviews, nil
}
}
prs, reviews, err := c.gql.FetchPRsWithReviews(ctx, owner, repo, since, until)
if err != nil {
return nil, nil, err
}
// Cache results
c.cache.Set(cacheKey, cachedData{PRs: prs, Reviews: reviews})
return prs, reviews, nil
}
// FetchIssuesWithCommentsGraphQL fetches issues and comments using GraphQL (much fewer API calls)
func (c *Client) FetchIssuesWithCommentsGraphQL(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, []models.IssueComment, error) {
if c.gql == nil {
return nil, nil, fmt.Errorf("GraphQL client not initialized")
}
cacheKey := fmt.Sprintf("gql_issues_comments:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
type cachedData struct {
Issues []models.Issue
Comments []models.IssueComment
}
if cached, ok := c.cache.Get(cacheKey); ok {
if data, ok := cached.(cachedData); ok {
c.progress(" Using cached issues and comments data (GraphQL)")
return data.Issues, data.Comments, nil
}
}
issues, comments, err := c.gql.FetchIssuesWithComments(ctx, owner, repo, since, until)
if err != nil {
return nil, nil, err
}
// Cache results
c.cache.Set(cacheKey, cachedData{Issues: issues, Comments: comments})
return issues, comments, nil
}
// SetRetryConfig sets the retry configuration
func (c *Client) SetRetryConfig(rc RetryConfig) {
c.retry = rc
@@ -226,12 +301,16 @@ func isRetryableError(err error) bool {
"timeout",
"temporary failure",
"server error",
"stream error",
"CANCEL",
"EOF",
"broken pipe",
"502",
"503",
"504",
}
for _, msg := range retryableMessages {
if strings.Contains(strings.ToLower(errStr), msg) {
if strings.Contains(strings.ToLower(errStr), strings.ToLower(msg)) {
return true
}
}
@@ -272,20 +351,57 @@ func (c *Client) ListOrgRepos(ctx context.Context, org, pattern string) ([]strin
return allRepos, nil
}
// GetCommitCountSince returns the approximate number of commits since a given date.
// This is used to determine the optimal shallow clone depth.
// It makes a single lightweight API call with per_page=1 to get pagination info.
func (c *Client) GetCommitCountSince(ctx context.Context, owner, repo string, since time.Time) (int, error) {
opts := &github.CommitsListOptions{
Since: since,
ListOptions: github.ListOptions{
PerPage: 1,
},
}
var resp *github.Response
err := c.retryWithBackoff(ctx, "get commit count", func() error {
var err error
_, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return 0, fmt.Errorf("failed to get commit count: %w", err)
}
// GitHub returns pagination info in the response
// LastPage indicates total number of pages (with 1 item per page = total commits)
if resp.LastPage > 0 {
return resp.LastPage, nil
}
// If LastPage is 0, there's only one page (or no commits)
// In this case, we need to check if there are any commits at all
if resp.FirstPage == 0 && resp.NextPage == 0 {
// Make another call to actually count
opts.ListOptions.PerPage = 100
var commits []*github.RepositoryCommit
err := c.retryWithBackoff(ctx, "count commits", func() error {
var err error
commits, _, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return 0, err
}
return len(commits), nil
}
return 1, nil
}
// FetchCommits fetches commits from a repository within a date range
func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) {
cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if commits, ok := cached.([]models.Commit); ok {
c.progress(" Using cached commits data")
return commits, nil
}
}
var allCommits []models.Commit
opts := &github.CommitsListOptions{
ListOptions: github.ListOptions{PerPage: 100},
}
@@ -297,23 +413,19 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un
opts.Until = *until
}
page := 1
for {
var commits []*github.RepositoryCommit
var resp *github.Response
err := c.retryWithBackoff(ctx, "list commits", func() error {
var err error
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list commits: %w", err)
}
c.progress(fmt.Sprintf(" Fetching commits page %d (%d commits so far)...", page, len(allCommits)))
for i, commit := range commits {
fetcher := &EnrichingFetcher[*github.RepositoryCommit, models.Commit]{
FetchFn: func(ctx context.Context, page int) ([]*github.RepositoryCommit, *github.Response, error) {
opts.Page = page
var commits []*github.RepositoryCommit
var resp *github.Response
err := c.retryWithBackoff(ctx, "list commits", func() error {
var err error
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
return commits, resp, err
},
EnrichFn: func(ctx context.Context, commit *github.RepositoryCommit) (models.Commit, error) {
// Fetch detailed commit info for stats
var detailed *github.RepositoryCommit
err := c.retryWithBackoff(ctx, fmt.Sprintf("get commit %s", commit.GetSHA()[:7]), func() error {
@@ -322,31 +434,24 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un
return err
})
if err != nil {
// Log and continue - we can still use basic info
c.progress(fmt.Sprintf(" Warning: failed to get commit details for %s: %v", commit.GetSHA()[:7], err))
continue
return models.Commit{}, err
}
mc := convertCommit(detailed, owner, repo)
allCommits = append(allCommits, mc)
// Progress every 10 commits
if (i+1)%10 == 0 {
c.progress(fmt.Sprintf(" Processing commit %d/%d on page %d...", i+1, len(commits), page))
return convertCommit(detailed, owner, repo), nil
},
GetDateFn: func(commit *github.RepositoryCommit) time.Time {
if commit.Commit != nil && commit.Commit.Author != nil {
return commit.Commit.Author.GetDate().Time
}
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
return time.Time{}
},
Since: since,
Until: until,
}
// Cache results
c.cache.Set(cacheKey, allCommits)
config := DefaultFetchConfig("commits")
config.EarlyTermination = false // GitHub API already filters by since/until
return allCommits, nil
return FetchAllPagesWithEnrichment(ctx, c, cacheKey, config, fetcher, 10)
}
// mainBranches are the branches we consider as "main" branches
@@ -387,11 +492,9 @@ func (c *Client) FetchPullRequests(ctx context.Context, owner, repo string, sinc
// fetchPRsForBranch fetches merged PRs for a specific base branch
func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch string, since, until *time.Time) ([]models.PullRequest, error) {
var branchPRs []models.PullRequest
opts := &github.PullRequestListOptions{
State: "closed",
Base: baseBranch, // Filter by base branch at API level
Base: baseBranch,
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{
@@ -399,118 +502,76 @@ func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch
},
}
page := 1
consecutiveOldPages := 0
for {
var prs []*github.PullRequest
var resp *github.Response
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
var err error
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
return err
})
if err != nil {
return branchPRs, err
}
if page == 1 && len(prs) > 0 {
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
}
matchedInPage := 0
oldInPage := 0
for _, pr := range prs {
// Only consider merged PRs (check MergedAt since Merged field isn't in list response)
if pr.MergedAt == nil {
continue
fetcher := &DateFilteredFetcher[*github.PullRequest, models.PullRequest]{
FetchFn: func(ctx context.Context, page int) ([]*github.PullRequest, *github.Response, error) {
opts.Page = page
var prs []*github.PullRequest
var resp *github.Response
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
var err error
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
return err
})
if page == 1 && len(prs) > 0 {
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
}
// Use merge date for filtering
mergedAt := pr.MergedAt.Time
// Skip items newer than our range
if until != nil && mergedAt.After(*until) {
continue
return prs, resp, err
},
ConvertFn: func(pr *github.PullRequest) models.PullRequest {
return convertPullRequest(pr, owner, repo)
},
GetDateFn: func(pr *github.PullRequest) time.Time {
if pr.MergedAt != nil {
return pr.MergedAt.Time
}
// If older than our range, track it
if since != nil && mergedAt.Before(*since) {
oldInPage++
continue
}
mpr := convertPullRequest(pr, owner, repo)
branchPRs = append(branchPRs, mpr)
matchedInPage++
}
// Early termination: if we got a page with only old PRs (or empty), increment counter
if matchedInPage == 0 && oldInPage > 0 {
consecutiveOldPages++
// Stop after 2 consecutive pages of only old PRs
if consecutiveOldPages >= 2 {
break
}
} else {
consecutiveOldPages = 0
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
return time.Time{} // Will be filtered out by SkipFn
},
SkipFn: func(pr *github.PullRequest) bool {
// Only consider merged PRs
return pr.MergedAt == nil
},
Since: since,
Until: until,
}
return branchPRs, nil
config := FetchConfig{
ResourceName: "pull requests",
EarlyTermination: true,
EarlyTerminationThreshold: 2,
Quiet: true, // Parent function handles progress
}
return FetchAllPages(ctx, c, "", config, fetcher) // Empty cache key - parent handles caching
}
// FetchReviews fetches reviews for a specific pull request
func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber int) ([]models.Review, error) {
cacheKey := fmt.Sprintf("reviews:%s/%s:%d", owner, repo, prNumber)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if reviews, ok := cached.([]models.Review); ok {
return reviews, nil
}
}
var allReviews []models.Review
opts := &github.ListOptions{PerPage: 100}
for {
var reviews []*github.PullRequestReview
var resp *github.Response
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
var err error
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list reviews: %w", err)
}
for _, review := range reviews {
mr := convertReview(review, owner, repo, prNumber)
allReviews = append(allReviews, mr)
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
fetcher := &SimpleFetcher[*github.PullRequestReview, models.Review]{
FetchFn: func(ctx context.Context, page int) ([]*github.PullRequestReview, *github.Response, error) {
opts.Page = page
var reviews []*github.PullRequestReview
var resp *github.Response
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
var err error
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
return err
})
return reviews, resp, err
},
ConvertFn: func(review *github.PullRequestReview) models.Review {
return convertReview(review, owner, repo, prNumber)
},
}
// Cache results
c.cache.Set(cacheKey, allReviews)
config := DefaultFetchConfig("reviews")
config.EarlyTermination = false // Reviews don't need date-based early termination
config.Quiet = true // Suppress per-page progress (called many times in parallel)
return allReviews, nil
return FetchAllPages(ctx, c, cacheKey, config, fetcher)
}
// FetchIssues fetches issues from a repository
@@ -518,18 +579,6 @@ func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber
func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, error) {
cacheKey := fmt.Sprintf("issues:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if issues, ok := cached.([]models.Issue); ok {
c.progress(" Using cached issues data")
return issues, nil
}
}
var allIssues []models.Issue
// Sort by created date descending - newest first
// This allows us to stop early when we hit items older than our date range
opts := &github.IssueListByRepoOptions{
State: "all",
Sort: "created",
@@ -539,77 +588,33 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt
},
}
// Note: GitHub Issues API has a 'since' parameter but it filters by update time, not created time
// So we use our own filtering with early termination for better control
page := 1
reachedOldItems := false
for {
var issues []*github.Issue
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issues", func() error {
var err error
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list issues: %w", err)
}
c.progress(fmt.Sprintf(" Fetching issues page %d (%d issues so far)...", page, len(allIssues)))
oldItemsInPage := 0
totalNonPRItems := 0
for _, issue := range issues {
fetcher := &DateFilteredFetcher[*github.Issue, models.Issue]{
FetchFn: func(ctx context.Context, page int) ([]*github.Issue, *github.Response, error) {
opts.Page = page
var issues []*github.Issue
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issues", func() error {
var err error
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
return err
})
return issues, resp, err
},
ConvertFn: func(issue *github.Issue) models.Issue {
return convertIssue(issue, owner, repo)
},
GetDateFn: func(issue *github.Issue) time.Time {
return issue.GetCreatedAt().Time
},
SkipFn: func(issue *github.Issue) bool {
// Skip pull requests (they appear in issues API)
if issue.PullRequestLinks != nil {
continue
}
totalNonPRItems++
createdAt := issue.GetCreatedAt().Time
// Skip items newer than our range (when until is specified)
if until != nil && createdAt.After(*until) {
continue
}
// If we've gone past our date range (older than since), count it
if since != nil && createdAt.Before(*since) {
oldItemsInPage++
continue
}
mi := convertIssue(issue, owner, repo)
allIssues = append(allIssues, mi)
}
// If all non-PR items in this page are older than our range, we can stop
// (since results are sorted by created date descending)
if oldItemsInPage == totalNonPRItems && totalNonPRItems > 0 {
c.progress(fmt.Sprintf(" Reached issues older than date range, stopping early (page %d)", page))
reachedOldItems = true
break
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
return issue.PullRequestLinks != nil
},
Since: since,
Until: until,
}
if !reachedOldItems && page > 1 {
c.progress(fmt.Sprintf(" Fetched all %d pages of issues", page))
}
// Cache results
c.cache.Set(cacheKey, allIssues)
return allIssues, nil
return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issues"), fetcher)
}
// FetchIssueComments fetches comments on issues from a repository
@@ -617,18 +622,6 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt
func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.IssueComment, error) {
cacheKey := fmt.Sprintf("issue_comments:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if comments, ok := cached.([]models.IssueComment); ok {
c.progress(" Using cached issue comments data")
return comments, nil
}
}
var allComments []models.IssueComment
// Sort by created date descending - newest first
// This allows us to stop early when we hit items older than our date range
opts := &github.IssueListCommentsOptions{
Sort: github.Ptr("created"),
Direction: github.Ptr("desc"),
@@ -642,97 +635,29 @@ func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, sin
opts.Since = since
}
page := 1
reachedOldItems := false
for {
var comments []*github.IssueComment
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issue comments", func() error {
var err error
// Passing empty issue number fetches all comments in the repo
comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list issue comments: %w", err)
}
c.progress(fmt.Sprintf(" Fetching issue comments page %d (%d comments so far)...", page, len(allComments)))
oldItemsInPage := 0
totalItems := len(comments)
for _, comment := range comments {
createdAt := comment.GetCreatedAt().Time
// Skip items newer than our range (when until is specified)
if until != nil && createdAt.After(*until) {
continue
}
// If we've gone past our date range (older than since), count it
if since != nil && createdAt.Before(*since) {
oldItemsInPage++
continue
}
// Extract issue number from the issue URL
issueNumber := 0
if comment.IssueURL != nil {
// Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number}
parts := strings.Split(*comment.IssueURL, "/")
if len(parts) > 0 {
if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil {
issueNumber = num
}
}
}
var author models.Author
if comment.User != nil {
author = models.Author{
Login: comment.User.GetLogin(),
Name: comment.User.GetName(),
AvatarURL: comment.User.GetAvatarURL(),
}
}
ic := models.IssueComment{
ID: comment.GetID(),
Issue: issueNumber,
Repository: fmt.Sprintf("%s/%s", owner, repo),
Author: author,
Body: comment.GetBody(),
CreatedAt: createdAt,
}
allComments = append(allComments, ic)
}
// If all items in this page are older than our range, we can stop
// (since results are sorted by created date descending)
if oldItemsInPage == totalItems && totalItems > 0 {
c.progress(fmt.Sprintf(" Reached issue comments older than date range, stopping early (page %d)", page))
reachedOldItems = true
break
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
fetcher := &DateFilteredFetcher[*github.IssueComment, models.IssueComment]{
FetchFn: func(ctx context.Context, page int) ([]*github.IssueComment, *github.Response, error) {
opts.Page = page
var comments []*github.IssueComment
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issue comments", func() error {
var err error
comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts)
return err
})
return comments, resp, err
},
ConvertFn: func(comment *github.IssueComment) models.IssueComment {
return convertIssueComment(comment, owner, repo)
},
GetDateFn: func(comment *github.IssueComment) time.Time {
return comment.GetCreatedAt().Time
},
Since: since,
Until: until,
}
if !reachedOldItems && page > 1 {
c.progress(fmt.Sprintf(" Fetched all %d pages of issue comments", page))
}
// Cache results
c.cache.Set(cacheKey, allComments)
return allComments, nil
return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issue comments"), fetcher)
}
// UserProfile contains GitHub user profile information useful for deduplication
@@ -995,6 +920,38 @@ func convertReview(r *github.PullRequestReview, owner, repo string, prNumber int
}
}
func convertIssueComment(comment *github.IssueComment, owner, repo string) models.IssueComment {
// Extract issue number from the issue URL
issueNumber := 0
if comment.IssueURL != nil {
// Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number}
parts := strings.Split(*comment.IssueURL, "/")
if len(parts) > 0 {
if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil {
issueNumber = num
}
}
}
var author models.Author
if comment.User != nil {
author = models.Author{
Login: comment.User.GetLogin(),
Name: comment.User.GetName(),
AvatarURL: comment.User.GetAvatarURL(),
}
}
return models.IssueComment{
ID: comment.GetID(),
Issue: issueNumber,
Repository: fmt.Sprintf("%s/%s", owner, repo),
Author: author,
Body: comment.GetBody(),
CreatedAt: comment.GetCreatedAt().Time,
}
}
func convertIssue(i *github.Issue, owner, repo string) models.Issue {
var author models.Author
if i.User != nil {
+324
View File
@@ -0,0 +1,324 @@
package github
import (
"context"
"fmt"
"time"
"github.com/google/go-github/v68/github"
)
// DateFilterResult represents the result of date filtering
type DateFilterResult int
const (
// DateInclude means the item is within the date range
DateInclude DateFilterResult = iota
// DateTooNew means the item is newer than the 'until' date
DateTooNew
// DateTooOld means the item is older than the 'since' date
DateTooOld
)
// FilterByDate checks if a time falls within the specified date range
func FilterByDate(t time.Time, since, until *time.Time) DateFilterResult {
if until != nil && t.After(*until) {
return DateTooNew
}
if since != nil && t.Before(*since) {
return DateTooOld
}
return DateInclude
}
// PageFetcher is a generic interface for fetching paginated resources
type PageFetcher[T any, R any] interface {
// Fetch retrieves a page of items
Fetch(ctx context.Context, page int) (items []T, resp *github.Response, err error)
// Convert transforms a raw item into the result type
Convert(item T) R
// Filter determines if an item should be included based on date range
// Returns DateInclude to include, DateTooNew/DateTooOld to exclude
Filter(item T) DateFilterResult
// ShouldSkip returns true if the item should be skipped entirely (e.g., PRs in issues list)
ShouldSkip(item T) bool
}
// FetchConfig holds configuration for paginated fetching
type FetchConfig struct {
// ResourceName is used for progress messages (e.g., "issues", "pull requests")
ResourceName string
// EarlyTermination enables stopping when all items on a page are too old
EarlyTermination bool
// EarlyTerminationThreshold is the number of consecutive old pages before stopping
EarlyTerminationThreshold int
// Quiet suppresses per-page progress messages (useful for sub-fetches like reviews)
Quiet bool
}
// DefaultFetchConfig returns sensible defaults
func DefaultFetchConfig(resourceName string) FetchConfig {
return FetchConfig{
ResourceName: resourceName,
EarlyTermination: true,
EarlyTerminationThreshold: 2,
}
}
// FetchAllPages fetches all pages of a resource with caching, filtering, and early termination
func FetchAllPages[T any, R any](
ctx context.Context,
c *Client,
cacheKey string,
config FetchConfig,
fetcher PageFetcher[T, R],
) ([]R, error) {
// Check cache first (skip if no cache key provided)
if cacheKey != "" {
if cached, ok := c.cache.Get(cacheKey); ok {
if results, ok := cached.([]R); ok {
c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName))
return results, nil
}
}
}
var allResults []R
page := 1
consecutiveOldPages := 0
for {
items, resp, err := fetcher.Fetch(ctx, page)
if err != nil {
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
}
// Safety check for nil response
if resp == nil {
break
}
if !config.Quiet {
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
}
oldInPage := 0
totalEligible := 0
for _, item := range items {
// Skip items that should be filtered out entirely (e.g., PRs in issues API)
if fetcher.ShouldSkip(item) {
continue
}
totalEligible++
// Apply date filtering
switch fetcher.Filter(item) {
case DateTooNew:
continue
case DateTooOld:
oldInPage++
continue
case DateInclude:
allResults = append(allResults, fetcher.Convert(item))
}
}
// Early termination logic
if config.EarlyTermination && totalEligible > 0 && oldInPage == totalEligible {
consecutiveOldPages++
if consecutiveOldPages >= config.EarlyTerminationThreshold {
if !config.Quiet {
c.progress(fmt.Sprintf(" Reached %s older than date range, stopping early (page %d)",
config.ResourceName, page))
}
break
}
} else {
consecutiveOldPages = 0
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
}
// Cache results (skip if no cache key provided)
if cacheKey != "" {
c.cache.Set(cacheKey, allResults)
}
return allResults, nil
}
// SimpleFetcher is a helper for creating simple fetchers without date filtering
type SimpleFetcher[T any, R any] struct {
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
ConvertFn func(item T) R
}
func (f *SimpleFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
return f.FetchFn(ctx, page)
}
func (f *SimpleFetcher[T, R]) Convert(item T) R {
return f.ConvertFn(item)
}
func (f *SimpleFetcher[T, R]) Filter(item T) DateFilterResult {
return DateInclude // No filtering
}
func (f *SimpleFetcher[T, R]) ShouldSkip(item T) bool {
return false
}
// DateFilteredFetcher extends SimpleFetcher with date filtering
type DateFilteredFetcher[T any, R any] struct {
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
ConvertFn func(item T) R
GetDateFn func(item T) time.Time
SkipFn func(item T) bool
Since *time.Time
Until *time.Time
}
func (f *DateFilteredFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
return f.FetchFn(ctx, page)
}
func (f *DateFilteredFetcher[T, R]) Convert(item T) R {
return f.ConvertFn(item)
}
func (f *DateFilteredFetcher[T, R]) Filter(item T) DateFilterResult {
return FilterByDate(f.GetDateFn(item), f.Since, f.Until)
}
func (f *DateFilteredFetcher[T, R]) ShouldSkip(item T) bool {
if f.SkipFn != nil {
return f.SkipFn(item)
}
return false
}
// WithRetry wraps a fetch function with retry logic
func (c *Client) WithRetry(ctx context.Context, operation string, fn func() error) error {
return c.retryWithBackoff(ctx, operation, fn)
}
// EnrichingFetcher extends DateFilteredFetcher with per-item enrichment
// This is useful when you need to fetch additional details for each item (e.g., commit details)
type EnrichingFetcher[T any, R any] struct {
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
EnrichFn func(ctx context.Context, item T) (R, error) // Enriches and converts in one step
GetDateFn func(item T) time.Time
SkipFn func(item T) bool
Since *time.Time
Until *time.Time
}
func (f *EnrichingFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
return f.FetchFn(ctx, page)
}
func (f *EnrichingFetcher[T, R]) Convert(item T) R {
// This won't be used - FetchAllPagesWithEnrichment handles enrichment
var zero R
return zero
}
func (f *EnrichingFetcher[T, R]) Filter(item T) DateFilterResult {
return FilterByDate(f.GetDateFn(item), f.Since, f.Until)
}
func (f *EnrichingFetcher[T, R]) ShouldSkip(item T) bool {
if f.SkipFn != nil {
return f.SkipFn(item)
}
return false
}
// FetchAllPagesWithEnrichment is like FetchAllPages but calls EnrichFn for each item
// This is useful when you need to make additional API calls per item (e.g., fetching commit details)
func FetchAllPagesWithEnrichment[T any, R any](
ctx context.Context,
c *Client,
cacheKey string,
config FetchConfig,
fetcher *EnrichingFetcher[T, R],
progressEvery int, // Report progress every N items (0 = disabled)
) ([]R, error) {
// Check cache first
if cacheKey != "" {
if cached, ok := c.cache.Get(cacheKey); ok {
if results, ok := cached.([]R); ok {
c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName))
return results, nil
}
}
}
var allResults []R
page := 1
for {
items, resp, err := fetcher.Fetch(ctx, page)
if err != nil {
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
}
// Safety check for nil response
if resp == nil {
break
}
if !config.Quiet {
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
}
itemsInPage := 0
for i, item := range items {
// Skip items that should be filtered out entirely
if fetcher.ShouldSkip(item) {
continue
}
// Apply date filtering
if fetcher.Filter(item) != DateInclude {
continue
}
// Enrich the item (this may make additional API calls)
enriched, err := fetcher.EnrichFn(ctx, item)
if err != nil {
c.progress(fmt.Sprintf(" Warning: failed to enrich item: %v", err))
continue
}
allResults = append(allResults, enriched)
itemsInPage++
// Progress reporting
if progressEvery > 0 && (i+1)%progressEvery == 0 {
c.progress(fmt.Sprintf(" Processing item %d/%d on page %d...", i+1, len(items), page))
}
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
}
// Cache results
if cacheKey != "" {
c.cache.Set(cacheKey, allResults)
}
return allResults, nil
}
+573
View File
@@ -0,0 +1,573 @@
package github
import (
"context"
"fmt"
"io"
"os"
"strings"
"time"
"github.com/charmbracelet/bubbles/progress"
"github.com/charmbracelet/lipgloss"
"github.com/lukaszraczylo/git-velocity/internal/domain/models"
"github.com/shurcooL/githubv4"
"golang.org/x/oauth2"
)
// progressBar handles terminal progress display
type progressBar struct {
progress progress.Model
label string
total int
current int
out io.Writer
}
func newProgressBar(label string, total int) *progressBar {
p := progress.New(
progress.WithDefaultGradient(),
progress.WithWidth(40),
)
return &progressBar{
progress: p,
label: label,
total: total,
current: 0,
out: os.Stderr,
}
}
func (p *progressBar) update(fetched int) {
p.current = fetched
// Guard against division by zero
var percent float64
if p.total > 0 {
percent = float64(p.current) / float64(p.total)
if percent > 1.0 {
percent = 1.0
}
} else {
percent = 0.0
}
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
fmt.Fprintf(p.out, "\r%s %s %s",
labelStyle.Render(p.label),
p.progress.ViewAs(percent),
countStyle.Render(fmt.Sprintf("%d/%d", p.current, p.total)),
)
}
func (p *progressBar) done() {
p.update(p.total)
fmt.Fprintln(p.out)
}
// GraphQLClient wraps the githubv4 client for GitHub API
type GraphQLClient struct {
client *githubv4.Client
}
// NewGraphQLClient creates a new GraphQL client for GitHub
func NewGraphQLClient(token string) *GraphQLClient {
src := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: token},
)
httpClient := oauth2.NewClient(context.Background(), src)
client := githubv4.NewClient(httpClient)
return &GraphQLClient{
client: client,
}
}
// PageInfo contains pagination info from GraphQL responses
type PageInfo struct {
HasNextPage bool
EndCursor githubv4.String
}
// PageResult represents a page of results from GraphQL
type PageResult[T any] struct {
TotalCount int
PageInfo PageInfo
Nodes []T
}
// GQLFetchConfig configures the generic paginated fetcher for GraphQL
type GQLFetchConfig[Q any, T any, R any] struct {
Label string
Query *Q
GetPageResult func(q *Q) PageResult[T]
// ProcessNode returns items, whether this node is "old" (outside date range),
// and whether to hard stop immediately (past cutoff date)
ProcessNode func(node T, repo string) (items []R, isOld bool, hardStop bool)
// ConsecutiveOldPagesToStop controls early termination (default: 2)
ConsecutiveOldPagesToStop int
}
// fetchGQLPaginated is a generic paginated fetcher for GraphQL queries
func fetchGQLPaginated[Q any, T any, R any](
ctx context.Context,
client *githubv4.Client,
owner, repo string,
config GQLFetchConfig[Q, T, R],
) ([]R, error) {
var allResults []R
variables := map[string]interface{}{
"owner": githubv4.String(owner),
"repo": githubv4.String(repo),
"cursor": (*githubv4.String)(nil),
}
var pbar *progressBar
fetched := 0
repoFullName := fmt.Sprintf("%s/%s", owner, repo)
consecutiveOldPages := 0
pagesToStop := config.ConsecutiveOldPagesToStop
if pagesToStop == 0 {
pagesToStop = 2 // default
}
for {
// Retry logic for transient errors
var queryErr error
for retries := 0; retries < 3; retries++ {
queryErr = client.Query(ctx, config.Query, variables)
if queryErr == nil {
break
}
// Check if error is retryable
if !isGQLRetryableError(queryErr) {
break
}
// Wait before retry with exponential backoff
backoff := time.Duration(1<<retries) * time.Second
fmt.Fprintf(os.Stderr, "\r GraphQL retry %d/3 (waiting %s): %v\n", retries+1, backoff, queryErr)
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(backoff):
}
}
if queryErr != nil {
return nil, fmt.Errorf("graphql query failed: %w", queryErr)
}
page := config.GetPageResult(config.Query)
// Initialize progress bar on first query
if pbar == nil && page.TotalCount > 0 {
pbar = newProgressBar(config.Label, page.TotalCount)
}
oldInPage := 0
totalInPage := 0
shouldHardStop := false
for _, node := range page.Nodes {
fetched++
totalInPage++
items, isOld, hardStop := config.ProcessNode(node, repoFullName)
allResults = append(allResults, items...)
if isOld {
oldInPage++
}
if hardStop {
shouldHardStop = true
break
}
}
if pbar != nil {
pbar.update(fetched)
}
// Hard stop takes priority (past cutoff date)
if shouldHardStop {
if pbar != nil {
pbar.done()
}
break
}
// Track consecutive pages where all items are old
if totalInPage > 0 && oldInPage == totalInPage {
consecutiveOldPages++
} else {
consecutiveOldPages = 0
}
// Stop if we've seen enough consecutive old pages or no more pages
if consecutiveOldPages >= pagesToStop || !page.PageInfo.HasNextPage {
if pbar != nil {
pbar.done()
}
break
}
variables["cursor"] = githubv4.NewString(page.PageInfo.EndCursor)
}
return allResults, nil
}
// Query structs for PRs with reviews
type gqlPRQuery struct {
Repository struct {
PullRequests struct {
TotalCount int
PageInfo PageInfo
Nodes []gqlPRNode
} `graphql:"pullRequests(first: 100, after: $cursor, states: [MERGED], orderBy: {field: UPDATED_AT, direction: DESC})"`
} `graphql:"repository(owner: $owner, name: $repo)"`
}
type gqlPRNode struct {
Number int
Title string
State string
Merged bool
Additions int
Deletions int
ChangedFiles int
CreatedAt time.Time
UpdatedAt time.Time
MergedAt *time.Time
ClosedAt *time.Time
BaseRefName string
HeadRefName string
URL string
Commits struct{ TotalCount int }
Author gqlActor
Reviews struct {
TotalCount int
Nodes []gqlReviewNode
PageInfo PageInfo
} `graphql:"reviews(first: 100)"`
}
type gqlActor struct {
Login string
AvatarURL string `graphql:"avatarUrl"`
}
type gqlReviewNode struct {
ID string `graphql:"id"`
Author gqlActor
State string
SubmittedAt *time.Time
Body string
Comments struct{ TotalCount int } `graphql:"comments"`
}
// Query struct for issues with comments
type gqlIssueQuery struct {
Repository struct {
Issues struct {
TotalCount int
PageInfo PageInfo
Nodes []gqlIssueNode
} `graphql:"issues(first: 100, after: $cursor, orderBy: {field: CREATED_AT, direction: DESC})"`
} `graphql:"repository(owner: $owner, name: $repo)"`
}
type gqlIssueNode struct {
Number int
Title string
State string
CreatedAt time.Time
UpdatedAt time.Time
ClosedAt *time.Time
URL string
Author gqlActor
Labels struct {
Nodes []struct{ Name string }
} `graphql:"labels(first: 10)"`
Comments struct {
TotalCount int
Nodes []gqlCommentNode
PageInfo PageInfo
} `graphql:"comments(first: 100)"`
}
type gqlCommentNode struct {
ID string `graphql:"id"`
Author gqlActor
Body string
CreatedAt time.Time
}
// prWithReviews bundles a PR with its reviews for the generic fetcher
type prWithReviews struct {
PR models.PullRequest
Reviews []models.Review
}
// FetchPRsWithReviews fetches pull requests with their reviews using GraphQL
func (g *GraphQLClient) FetchPRsWithReviews(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.PullRequest, []models.Review, error) {
var query gqlPRQuery
// Hard cutoff: 1 week before start date - stop fetching entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
results, err := fetchGQLPaginated(ctx, g.client, owner, repo, GQLFetchConfig[gqlPRQuery, gqlPRNode, prWithReviews]{
Label: " Fetching PRs:",
Query: &query,
ConsecutiveOldPagesToStop: 2,
GetPageResult: func(q *gqlPRQuery) PageResult[gqlPRNode] {
return PageResult[gqlPRNode]{
TotalCount: q.Repository.PullRequests.TotalCount,
PageInfo: q.Repository.PullRequests.PageInfo,
Nodes: q.Repository.PullRequests.Nodes,
}
},
ProcessNode: func(node gqlPRNode, repoName string) ([]prWithReviews, bool, bool) {
// Skip if not merged - not counted as "old"
if node.MergedAt == nil {
return nil, false, false
}
mergedAt := *node.MergedAt
// Hard cutoff check - stop entirely if past this date
if hardCutoff != nil && mergedAt.Before(*hardCutoff) {
return nil, true, true // Hard stop
}
// Check date range - skip if outside range
if until != nil && mergedAt.After(*until) {
return nil, false, false // Too new, not "old"
}
if since != nil && mergedAt.Before(*since) {
return nil, true, false // Too old - signal for early termination tracking
}
// Convert PR
pr := convertPRNode(node, repoName)
// Convert reviews
var reviews []models.Review
for _, r := range node.Reviews.Nodes {
reviews = append(reviews, convertReviewNode(r, repoName, node.Number))
}
return []prWithReviews{{PR: pr, Reviews: reviews}}, false, false
},
})
if err != nil {
return nil, nil, err
}
// Flatten results
var prs []models.PullRequest
var reviews []models.Review
for _, r := range results {
prs = append(prs, r.PR)
reviews = append(reviews, r.Reviews...)
}
return prs, reviews, nil
}
// issueWithComments bundles an issue with its comments for the generic fetcher
type issueWithComments struct {
Issue models.Issue
Comments []models.IssueComment
}
// FetchIssuesWithComments fetches issues with their comments using GraphQL
func (g *GraphQLClient) FetchIssuesWithComments(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, []models.IssueComment, error) {
var query gqlIssueQuery
// Hard cutoff: 1 week before start date - stop fetching entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
results, err := fetchGQLPaginated(ctx, g.client, owner, repo, GQLFetchConfig[gqlIssueQuery, gqlIssueNode, issueWithComments]{
Label: " Fetching issues:",
Query: &query,
ConsecutiveOldPagesToStop: 2,
GetPageResult: func(q *gqlIssueQuery) PageResult[gqlIssueNode] {
return PageResult[gqlIssueNode]{
TotalCount: q.Repository.Issues.TotalCount,
PageInfo: q.Repository.Issues.PageInfo,
Nodes: q.Repository.Issues.Nodes,
}
},
ProcessNode: func(node gqlIssueNode, repoName string) ([]issueWithComments, bool, bool) {
// Hard cutoff check - stop entirely if past this date
if hardCutoff != nil && node.CreatedAt.Before(*hardCutoff) {
return nil, true, true // Hard stop
}
// Check date range
if until != nil && node.CreatedAt.After(*until) {
return nil, false, false // Too new, not "old"
}
if since != nil && node.CreatedAt.Before(*since) {
return nil, true, false // Too old - signal for early termination tracking
}
// Convert issue
issue := convertIssueNode(node, repoName)
// Convert comments within date range
var comments []models.IssueComment
for _, c := range node.Comments.Nodes {
if until != nil && c.CreatedAt.After(*until) {
continue
}
if since != nil && c.CreatedAt.Before(*since) {
continue
}
comments = append(comments, convertCommentNode(c, repoName, node.Number))
}
return []issueWithComments{{Issue: issue, Comments: comments}}, false, false
},
})
if err != nil {
return nil, nil, err
}
// Flatten results
var issues []models.Issue
var comments []models.IssueComment
for _, r := range results {
issues = append(issues, r.Issue)
comments = append(comments, r.Comments...)
}
return issues, comments, nil
}
// Conversion helpers
func convertActor(a gqlActor) models.Author {
return models.Author{
Login: a.Login,
AvatarURL: a.AvatarURL,
}
}
func convertPRNode(node gqlPRNode, repoName string) models.PullRequest {
state := models.PRStateOpen
if node.Merged {
state = models.PRStateMerged
} else if node.State == "CLOSED" {
state = models.PRStateClosed
}
return models.PullRequest{
Number: node.Number,
Title: node.Title,
State: state,
Author: convertActor(node.Author),
Repository: repoName,
BaseBranch: node.BaseRefName,
HeadBranch: node.HeadRefName,
CreatedAt: node.CreatedAt,
UpdatedAt: node.UpdatedAt,
MergedAt: node.MergedAt,
ClosedAt: node.ClosedAt,
Additions: node.Additions,
Deletions: node.Deletions,
FilesChanged: node.ChangedFiles,
CommitCount: node.Commits.TotalCount,
Comments: node.Reviews.TotalCount,
URL: node.URL,
}
}
func convertReviewNode(node gqlReviewNode, repoName string, prNumber int) models.Review {
var submittedAt time.Time
if node.SubmittedAt != nil {
submittedAt = *node.SubmittedAt
}
return models.Review{
PullRequest: prNumber,
Repository: repoName,
Author: convertActor(node.Author),
State: models.ReviewState(node.State),
SubmittedAt: submittedAt,
Body: node.Body,
CommentsCount: node.Comments.TotalCount,
}
}
func convertIssueNode(node gqlIssueNode, repoName string) models.Issue {
state := models.IssueStateOpen
if node.State == "CLOSED" {
state = models.IssueStateClosed
}
var labels []string
for _, l := range node.Labels.Nodes {
labels = append(labels, l.Name)
}
return models.Issue{
Number: node.Number,
Title: node.Title,
State: state,
Author: convertActor(node.Author),
Repository: repoName,
CreatedAt: node.CreatedAt,
UpdatedAt: node.UpdatedAt,
ClosedAt: node.ClosedAt,
Comments: node.Comments.TotalCount,
Labels: labels,
URL: node.URL,
}
}
func convertCommentNode(node gqlCommentNode, repoName string, issueNumber int) models.IssueComment {
return models.IssueComment{
Issue: issueNumber,
Repository: repoName,
Author: convertActor(node.Author),
Body: node.Body,
CreatedAt: node.CreatedAt,
}
}
// isGQLRetryableError checks if a GraphQL error is transient and should be retried
func isGQLRetryableError(err error) bool {
if err == nil {
return false
}
errStr := strings.ToLower(err.Error())
retryablePatterns := []string{
"stream error",
"cancel",
"eof",
"connection reset",
"connection refused",
"timeout",
"temporary failure",
"broken pipe",
"502",
"503",
"504",
}
for _, pattern := range retryablePatterns {
if strings.Contains(errStr, pattern) {
return true
}
}
return false
}