Use github graphql client as primary, fallback to rest client

This commit is contained in:
2025-12-11 23:35:53 +00:00
parent 5115551543
commit 03d1ef430a
8 changed files with 929 additions and 100 deletions
+109 -78
View File
@@ -204,7 +204,6 @@ func (a *App) collectRepoData(ctx context.Context, owner, name string, dateRange
if err != nil {
return fmt.Errorf("failed to fetch commits: %w", err)
}
a.log(" Found %d commits", len(commits))
// Filter out bots
for _, c := range commits {
@@ -213,87 +212,65 @@ func (a *App) collectRepoData(ctx context.Context, owner, name string, dateRange
}
}
// Fetch pull requests
prs, err := a.client.FetchPullRequests(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch pull requests: %w", err)
}
a.log(" Found %d pull requests", len(prs))
for _, pr := range prs {
if !a.config.IsBot(pr.Author.Login) {
data.PullRequests = append(data.PullRequests, pr)
}
}
// Fetch reviews in parallel for all PRs (already filtered by FetchPullRequests)
if len(prs) > 0 {
a.log(" Fetching reviews for %d PRs in parallel...", len(prs))
type reviewResult struct {
reviews []models.Review
err error
}
// Use worker pool to limit concurrent requests
concurrency := a.config.Options.ConcurrentRequests
if concurrency <= 0 {
concurrency = 5
}
results := make(chan reviewResult, len(prs))
sem := make(chan struct{}, concurrency)
for _, pr := range prs {
go func(prNum int) {
sem <- struct{}{} // Acquire
defer func() { <-sem }() // Release
reviews, err := a.client.FetchReviews(ctx, owner, name, prNum)
results <- reviewResult{reviews: reviews, err: err}
}(pr.Number)
}
// Collect results
reviewCount := 0
for i := 0; i < len(prs); i++ {
result := <-results
if result.err != nil {
continue
// Fetch pull requests and reviews
// Use GraphQL if available (much fewer API calls), otherwise fall back to REST
if a.client.HasGraphQL() {
prs, reviews, err := a.client.FetchPRsWithReviewsGraphQL(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
a.log(" Warning: GraphQL fetch failed, falling back to REST: %v", err)
// Fall back to REST
prs, reviews, err = a.fetchPRsAndReviewsREST(ctx, owner, name, dateRange, data)
if err != nil {
return err
}
for _, r := range result.reviews {
if !a.config.IsBot(r.Author.Login) {
data.Reviews = append(data.Reviews, r)
reviewCount++
}
// Filter out bots
for _, pr := range prs {
if !a.config.IsBot(pr.Author.Login) {
data.PullRequests = append(data.PullRequests, pr)
}
}
for _, r := range reviews {
if !a.config.IsBot(r.Author.Login) {
data.Reviews = append(data.Reviews, r)
}
}
} else {
// Use REST API
if _, _, err := a.fetchPRsAndReviewsREST(ctx, owner, name, dateRange, data); err != nil {
return err
}
}
// Fetch issues and comments
// Use GraphQL if available (much fewer API calls), otherwise fall back to REST
if a.client.HasGraphQL() {
issues, comments, err := a.client.FetchIssuesWithCommentsGraphQL(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
a.log(" Warning: GraphQL fetch failed, falling back to REST: %v", err)
// Fall back to REST
if err := a.fetchIssuesAndCommentsREST(ctx, owner, name, dateRange, data); err != nil {
return err
}
} else {
// Filter out bots
for _, issue := range issues {
if !a.config.IsBot(issue.Author.Login) {
data.Issues = append(data.Issues, issue)
}
}
for _, comment := range comments {
if !a.config.IsBot(comment.Author.Login) {
data.IssueComments = append(data.IssueComments, comment)
}
}
}
a.log(" Found %d reviews across %d PRs", reviewCount, len(prs))
}
// Fetch issues
issues, err := a.client.FetchIssues(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch issues: %w", err)
}
a.log(" Found %d issues", len(issues))
for _, issue := range issues {
if !a.config.IsBot(issue.Author.Login) {
data.Issues = append(data.Issues, issue)
}
}
// Fetch issue comments
issueComments, err := a.client.FetchIssueComments(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch issue comments: %w", err)
}
a.log(" Found %d issue comments", len(issueComments))
for _, comment := range issueComments {
if !a.config.IsBot(comment.Author.Login) {
data.IssueComments = append(data.IssueComments, comment)
} else {
// Use REST API
if err := a.fetchIssuesAndCommentsREST(ctx, owner, name, dateRange, data); err != nil {
return err
}
}
@@ -354,3 +331,57 @@ func (a *App) fetchUserProfiles(ctx context.Context, data *models.RawData) (map[
return profiles, nil
}
// fetchPRsAndReviewsREST fetches PRs and reviews using the REST API (fallback when GraphQL fails)
func (a *App) fetchPRsAndReviewsREST(ctx context.Context, owner, name string, dateRange *config.ParsedDateRange, data *models.RawData) ([]models.PullRequest, []models.Review, error) {
prs, err := a.client.FetchPullRequests(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch pull requests: %w", err)
}
a.log(" Found %d pull requests", len(prs))
// Fetch reviews for each PR
var reviews []models.Review
for _, pr := range prs {
prReviews, err := a.client.FetchReviews(ctx, owner, name, pr.Number)
if err != nil {
a.log(" Warning: failed to fetch reviews for PR #%d: %v", pr.Number, err)
continue
}
reviews = append(reviews, prReviews...)
}
a.log(" Found %d reviews (REST)", len(reviews))
return prs, reviews, nil
}
// fetchIssuesAndCommentsREST fetches issues and comments using the REST API (fallback when GraphQL fails)
func (a *App) fetchIssuesAndCommentsREST(ctx context.Context, owner, name string, dateRange *config.ParsedDateRange, data *models.RawData) error {
issues, err := a.client.FetchIssues(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
return fmt.Errorf("failed to fetch issues: %w", err)
}
a.log(" Found %d issues", len(issues))
// Filter out bots and add to data
for _, issue := range issues {
if !a.config.IsBot(issue.Author.Login) {
data.Issues = append(data.Issues, issue)
}
}
// Fetch all comments for the repository within date range
comments, err := a.client.FetchIssueComments(ctx, owner, name, dateRange.Start, dateRange.End)
if err != nil {
a.log(" Warning: failed to fetch issue comments: %v", err)
} else {
for _, comment := range comments {
if !a.config.IsBot(comment.Author.Login) {
data.IssueComments = append(data.IssueComments, comment)
}
}
a.log(" Found %d issue comments (REST)", len(comments))
}
return nil
}
+14 -10
View File
@@ -152,6 +152,7 @@ type OptionsConfig struct {
UseLocalGit bool `yaml:"use_local_git"` // Use local git for commits (faster)
ShallowClone bool `yaml:"shallow_clone"` // Use shallow clone based on date range (faster cloning)
ShallowCloneBuffer int `yaml:"shallow_clone_buffer"` // Extra commits to fetch beyond date range (default: 100)
UseGraphQL bool `yaml:"use_graphql"` // Use GraphQL API for batched queries (fewer API calls)
UserAliases []UserAlias `yaml:"user_aliases,omitempty"` // Manual email/name to login mappings
}
@@ -159,16 +160,18 @@ type OptionsConfig struct {
// These cannot be overridden by users to ensure consistent bot filtering
func DefaultBotPatterns() []string {
return []string{
"*[bot]", // GitHub App bots: dependabot[bot], renovate[bot], etc.
"dependabot*", // Dependabot variants
"renovate*", // Renovate bot variants
"github-actions*", // GitHub Actions
"codecov*", // Codecov bot
"snyk*", // Snyk security bot
"greenkeeper*", // Greenkeeper (legacy)
"imgbot*", // Image optimization bot
"allcontributors*", // All Contributors bot
"semantic-release*", // Semantic release bot
"*[bot]", // GitHub App bots: dependabot[bot], renovate[bot], etc.
"dependabot*", // Dependabot variants
"renovate*", // Renovate bot variants
"github-actions*", // GitHub Actions
"github-advanced-security", // GitHub Advanced Security
"*-actions-runner", // Self-hosted GitHub Actions runners
"codecov*", // Codecov bot
"snyk*", // Snyk security bot
"greenkeeper*", // Greenkeeper (legacy)
"imgbot*", // Image optimization bot
"allcontributors*", // All Contributors bot
"semantic-release*", // Semantic release bot
}
}
@@ -233,6 +236,7 @@ func DefaultConfig() *Config {
UseLocalGit: true, // Default to faster local git analysis
ShallowClone: true, // Default to shallow clone for faster cloning
ShallowCloneBuffer: 25, // Extra commits beyond date range for safety margin
UseGraphQL: true, // Default to GraphQL for fewer API calls
},
}
}
+109 -4
View File
@@ -3,12 +3,15 @@ package git
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/charmbracelet/bubbles/progress"
"github.com/charmbracelet/lipgloss"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
@@ -18,6 +21,55 @@ import (
"github.com/lukaszraczylo/git-velocity/internal/domain/models"
)
// commitProgressBar handles terminal progress display for commit iteration
type commitProgressBar struct {
progress progress.Model
label string
current int
out io.Writer
}
func newCommitProgressBar(label string) *commitProgressBar {
p := progress.New(
progress.WithDefaultGradient(),
progress.WithWidth(40),
)
return &commitProgressBar{
progress: p,
label: label,
current: 0,
out: os.Stderr,
}
}
func (p *commitProgressBar) update(count int) {
p.current = count
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
// Use a spinner-like display since we don't know total
spinner := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}
spinChar := spinner[count%len(spinner)]
fmt.Fprintf(p.out, "\r%s %s %s",
labelStyle.Render(p.label),
spinChar,
countStyle.Render(fmt.Sprintf("%d commits", p.current)),
)
}
func (p *commitProgressBar) done(total int) {
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
fmt.Fprintf(p.out, "\r%s %s %s\n",
labelStyle.Render(p.label),
p.progress.ViewAs(1.0),
countStyle.Render(fmt.Sprintf("%d commits", total)),
)
}
// ProgressCallback is called to report progress during git operations
type ProgressCallback func(message string)
@@ -158,8 +210,6 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil, fmt.Errorf("failed to open repository: %w", err)
}
r.progress(" Iterating commits with go-git...")
// Get all references to iterate all branches
refs, err := repo.References()
if err != nil {
@@ -171,6 +221,20 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
var commits []models.Commit
testPatterns := []string{"_test.go", ".test.", ".spec.", "/tests/", "/test/", "__tests__"}
// Progress bar for commit iteration
pbar := newCommitProgressBar(" Iterating commits:")
processedCount := 0
// Hard cutoff: 1 week before start date - stop iterating entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
// errStopIteration is used to signal early termination (not a real error)
var errStopIteration = fmt.Errorf("stop iteration")
err = refs.ForEach(func(ref *plumbing.Reference) error {
// Skip non-branch references
if !ref.Name().IsBranch() && !ref.Name().IsRemote() && !ref.Name().IsTag() {
@@ -188,6 +252,7 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil
}
consecutiveOld := 0
err = commitIter.ForEach(func(c *object.Commit) error {
// Check context cancellation
select {
@@ -201,13 +266,31 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil
}
seenCommits[c.Hash] = true
processedCount++
// Update progress every 10 commits to avoid too much I/O
if processedCount%10 == 0 {
pbar.update(processedCount)
}
commitTime := c.Author.When
// Hard cutoff - stop entirely if past this date
if hardCutoff != nil && commitTime.Before(*hardCutoff) {
return errStopIteration
}
// Filter by date range
if since != nil && commitTime.Before(*since) {
consecutiveOld++
// Early termination: if we've seen 100 consecutive old commits, stop this branch
if consecutiveOld >= 100 {
return errStopIteration
}
return nil
}
consecutiveOld = 0 // Reset counter when we find a valid commit
if until != nil && commitTime.After(*until) {
return nil
}
@@ -249,15 +332,27 @@ func (r *Repository) FetchCommits(ctx context.Context, owner, name string, since
return nil
})
// Handle expected termination conditions
if err == errStopIteration {
return nil // Not an error, just early termination for this branch
}
// Handle shallow clone boundary - "object not found" means we've reached
// the edge of the shallow clone history, which is expected behavior
if err != nil && isShallowBoundaryError(err) {
err = nil // Treat as normal end of history
}
return err
})
// Complete progress bar
pbar.done(len(commits))
if err != nil {
return nil, fmt.Errorf("failed to iterate commits: %w", err)
}
r.progress(fmt.Sprintf(" Found %d commits", len(commits)))
return commits, nil
}
@@ -372,6 +467,16 @@ func (r *Repository) getCommitStats(c *object.Commit, testPatterns []string) com
return stats
}
// isShallowBoundaryError checks if an error indicates we've hit the shallow clone boundary
func isShallowBoundaryError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
// go-git returns "object not found" when trying to access commits beyond shallow depth
return strings.Contains(errStr, "object not found")
}
// extractLoginFromEmail tries to extract GitHub login from email
func extractLoginFromEmail(email, fallbackName string) string {
// Pattern: 12345678+username@users.noreply.github.com
+77
View File
@@ -41,6 +41,7 @@ func DefaultRetryConfig() RetryConfig {
// Client wraps the GitHub API client with rate limiting and caching
type Client struct {
gh *github.Client
gql *GraphQLClient // GraphQL client for batched queries
config *config.Config
cache cache.Cache
retry RetryConfig
@@ -91,8 +92,15 @@ func NewClient(ctx context.Context, cfg *config.Config) (*Client, error) {
c = cache.NewNoopCache()
}
// Initialize GraphQL client if using token auth (GraphQL doesn't support GitHub App auth easily)
var gql *GraphQLClient
if cfg.HasGithubToken() && cfg.Options.UseGraphQL {
gql = NewGraphQLClient(cfg.Auth.GithubToken)
}
return &Client{
gh: gh,
gql: gql,
config: cfg,
cache: c,
retry: DefaultRetryConfig(),
@@ -107,6 +115,73 @@ func (c *Client) SetProgressCallback(cb ProgressCallback) {
}
}
// HasGraphQL returns true if the GraphQL client is available
func (c *Client) HasGraphQL() bool {
return c.gql != nil
}
// FetchPRsWithReviewsGraphQL fetches PRs and reviews using GraphQL (much fewer API calls)
func (c *Client) FetchPRsWithReviewsGraphQL(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.PullRequest, []models.Review, error) {
if c.gql == nil {
return nil, nil, fmt.Errorf("GraphQL client not initialized")
}
cacheKey := fmt.Sprintf("gql_prs_reviews:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
type cachedData struct {
PRs []models.PullRequest
Reviews []models.Review
}
if cached, ok := c.cache.Get(cacheKey); ok {
if data, ok := cached.(cachedData); ok {
c.progress(" Using cached PRs and reviews data (GraphQL)")
return data.PRs, data.Reviews, nil
}
}
prs, reviews, err := c.gql.FetchPRsWithReviews(ctx, owner, repo, since, until)
if err != nil {
return nil, nil, err
}
// Cache results
c.cache.Set(cacheKey, cachedData{PRs: prs, Reviews: reviews})
return prs, reviews, nil
}
// FetchIssuesWithCommentsGraphQL fetches issues and comments using GraphQL (much fewer API calls)
func (c *Client) FetchIssuesWithCommentsGraphQL(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, []models.IssueComment, error) {
if c.gql == nil {
return nil, nil, fmt.Errorf("GraphQL client not initialized")
}
cacheKey := fmt.Sprintf("gql_issues_comments:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
type cachedData struct {
Issues []models.Issue
Comments []models.IssueComment
}
if cached, ok := c.cache.Get(cacheKey); ok {
if data, ok := cached.(cachedData); ok {
c.progress(" Using cached issues and comments data (GraphQL)")
return data.Issues, data.Comments, nil
}
}
issues, comments, err := c.gql.FetchIssuesWithComments(ctx, owner, repo, since, until)
if err != nil {
return nil, nil, err
}
// Cache results
c.cache.Set(cacheKey, cachedData{Issues: issues, Comments: comments})
return issues, comments, nil
}
// SetRetryConfig sets the retry configuration
func (c *Client) SetRetryConfig(rc RetryConfig) {
c.retry = rc
@@ -459,6 +534,7 @@ func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch
ResourceName: "pull requests",
EarlyTermination: true,
EarlyTerminationThreshold: 2,
Quiet: true, // Parent function handles progress
}
return FetchAllPages(ctx, c, "", config, fetcher) // Empty cache key - parent handles caching
@@ -489,6 +565,7 @@ func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber
config := DefaultFetchConfig("reviews")
config.EarlyTermination = false // Reviews don't need date-based early termination
config.Quiet = true // Suppress per-page progress (called many times in parallel)
return FetchAllPages(ctx, c, cacheKey, config, fetcher)
}
+24 -6
View File
@@ -52,6 +52,8 @@ type FetchConfig struct {
EarlyTermination bool
// EarlyTerminationThreshold is the number of consecutive old pages before stopping
EarlyTerminationThreshold int
// Quiet suppresses per-page progress messages (useful for sub-fetches like reviews)
Quiet bool
}
// DefaultFetchConfig returns sensible defaults
@@ -91,8 +93,15 @@ func FetchAllPages[T any, R any](
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
}
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
// Safety check for nil response
if resp == nil {
break
}
if !config.Quiet {
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
}
oldInPage := 0
totalEligible := 0
@@ -121,8 +130,10 @@ func FetchAllPages[T any, R any](
if config.EarlyTermination && totalEligible > 0 && oldInPage == totalEligible {
consecutiveOldPages++
if consecutiveOldPages >= config.EarlyTerminationThreshold {
c.progress(fmt.Sprintf(" Reached %s older than date range, stopping early (page %d)",
config.ResourceName, page))
if !config.Quiet {
c.progress(fmt.Sprintf(" Reached %s older than date range, stopping early (page %d)",
config.ResourceName, page))
}
break
}
} else {
@@ -260,8 +271,15 @@ func FetchAllPagesWithEnrichment[T any, R any](
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
}
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
// Safety check for nil response
if resp == nil {
break
}
if !config.Quiet {
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
}
itemsInPage := 0
for i, item := range items {
+516
View File
@@ -0,0 +1,516 @@
package github
import (
"context"
"fmt"
"io"
"os"
"time"
"github.com/charmbracelet/bubbles/progress"
"github.com/charmbracelet/lipgloss"
"github.com/lukaszraczylo/git-velocity/internal/domain/models"
"github.com/shurcooL/githubv4"
"golang.org/x/oauth2"
)
// progressBar handles terminal progress display
type progressBar struct {
progress progress.Model
label string
total int
current int
out io.Writer
}
func newProgressBar(label string, total int) *progressBar {
p := progress.New(
progress.WithDefaultGradient(),
progress.WithWidth(40),
)
return &progressBar{
progress: p,
label: label,
total: total,
current: 0,
out: os.Stderr,
}
}
func (p *progressBar) update(fetched int) {
p.current = fetched
percent := float64(p.current) / float64(p.total)
if percent > 1.0 {
percent = 1.0
}
labelStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
countStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("241"))
fmt.Fprintf(p.out, "\r%s %s %s",
labelStyle.Render(p.label),
p.progress.ViewAs(percent),
countStyle.Render(fmt.Sprintf("%d/%d", p.current, p.total)),
)
}
func (p *progressBar) done() {
p.update(p.total)
fmt.Fprintln(p.out)
}
// GraphQLClient wraps the githubv4 client for GitHub API
type GraphQLClient struct {
client *githubv4.Client
}
// NewGraphQLClient creates a new GraphQL client for GitHub
func NewGraphQLClient(token string) *GraphQLClient {
src := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: token},
)
httpClient := oauth2.NewClient(context.Background(), src)
client := githubv4.NewClient(httpClient)
return &GraphQLClient{
client: client,
}
}
// PageInfo contains pagination info from GraphQL responses
type PageInfo struct {
HasNextPage bool
EndCursor githubv4.String
}
// PageResult represents a page of results from GraphQL
type PageResult[T any] struct {
TotalCount int
PageInfo PageInfo
Nodes []T
}
// GQLFetchConfig configures the generic paginated fetcher for GraphQL
type GQLFetchConfig[Q any, T any, R any] struct {
Label string
Query *Q
GetPageResult func(q *Q) PageResult[T]
// ProcessNode returns items, whether this node is "old" (outside date range),
// and whether to hard stop immediately (past cutoff date)
ProcessNode func(node T, repo string) (items []R, isOld bool, hardStop bool)
// ConsecutiveOldPagesToStop controls early termination (default: 2)
ConsecutiveOldPagesToStop int
}
// fetchGQLPaginated is a generic paginated fetcher for GraphQL queries
func fetchGQLPaginated[Q any, T any, R any](
ctx context.Context,
client *githubv4.Client,
owner, repo string,
config GQLFetchConfig[Q, T, R],
) ([]R, error) {
var allResults []R
variables := map[string]interface{}{
"owner": githubv4.String(owner),
"repo": githubv4.String(repo),
"cursor": (*githubv4.String)(nil),
}
var pbar *progressBar
fetched := 0
repoFullName := fmt.Sprintf("%s/%s", owner, repo)
consecutiveOldPages := 0
pagesToStop := config.ConsecutiveOldPagesToStop
if pagesToStop == 0 {
pagesToStop = 2 // default
}
for {
if err := client.Query(ctx, config.Query, variables); err != nil {
return nil, fmt.Errorf("graphql query failed: %w", err)
}
page := config.GetPageResult(config.Query)
// Initialize progress bar on first query
if pbar == nil && page.TotalCount > 0 {
pbar = newProgressBar(config.Label, page.TotalCount)
}
oldInPage := 0
totalInPage := 0
shouldHardStop := false
for _, node := range page.Nodes {
fetched++
totalInPage++
items, isOld, hardStop := config.ProcessNode(node, repoFullName)
allResults = append(allResults, items...)
if isOld {
oldInPage++
}
if hardStop {
shouldHardStop = true
break
}
}
if pbar != nil {
pbar.update(fetched)
}
// Hard stop takes priority (past cutoff date)
if shouldHardStop {
if pbar != nil {
pbar.done()
}
break
}
// Track consecutive pages where all items are old
if totalInPage > 0 && oldInPage == totalInPage {
consecutiveOldPages++
} else {
consecutiveOldPages = 0
}
// Stop if we've seen enough consecutive old pages or no more pages
if consecutiveOldPages >= pagesToStop || !page.PageInfo.HasNextPage {
if pbar != nil {
pbar.done()
}
break
}
variables["cursor"] = githubv4.NewString(page.PageInfo.EndCursor)
}
return allResults, nil
}
// Query structs for PRs with reviews
type gqlPRQuery struct {
Repository struct {
PullRequests struct {
TotalCount int
PageInfo PageInfo
Nodes []gqlPRNode
} `graphql:"pullRequests(first: 100, after: $cursor, states: [MERGED], orderBy: {field: UPDATED_AT, direction: DESC})"`
} `graphql:"repository(owner: $owner, name: $repo)"`
}
type gqlPRNode struct {
Number int
Title string
State string
Merged bool
Additions int
Deletions int
ChangedFiles int
CreatedAt time.Time
UpdatedAt time.Time
MergedAt *time.Time
ClosedAt *time.Time
BaseRefName string
HeadRefName string
URL string
Commits struct{ TotalCount int }
Author gqlActor
Reviews struct {
TotalCount int
Nodes []gqlReviewNode
PageInfo PageInfo
} `graphql:"reviews(first: 100)"`
}
type gqlActor struct {
Login string
AvatarURL string `graphql:"avatarUrl"`
}
type gqlReviewNode struct {
ID string `graphql:"id"`
Author gqlActor
State string
SubmittedAt *time.Time
Body string
Comments struct{ TotalCount int } `graphql:"comments"`
}
// Query struct for issues with comments
type gqlIssueQuery struct {
Repository struct {
Issues struct {
TotalCount int
PageInfo PageInfo
Nodes []gqlIssueNode
} `graphql:"issues(first: 100, after: $cursor, orderBy: {field: CREATED_AT, direction: DESC})"`
} `graphql:"repository(owner: $owner, name: $repo)"`
}
type gqlIssueNode struct {
Number int
Title string
State string
CreatedAt time.Time
UpdatedAt time.Time
ClosedAt *time.Time
URL string
Author gqlActor
Labels struct {
Nodes []struct{ Name string }
} `graphql:"labels(first: 10)"`
Comments struct {
TotalCount int
Nodes []gqlCommentNode
PageInfo PageInfo
} `graphql:"comments(first: 100)"`
}
type gqlCommentNode struct {
ID string `graphql:"id"`
Author gqlActor
Body string
CreatedAt time.Time
}
// prWithReviews bundles a PR with its reviews for the generic fetcher
type prWithReviews struct {
PR models.PullRequest
Reviews []models.Review
}
// FetchPRsWithReviews fetches pull requests with their reviews using GraphQL
func (g *GraphQLClient) FetchPRsWithReviews(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.PullRequest, []models.Review, error) {
var query gqlPRQuery
// Hard cutoff: 1 week before start date - stop fetching entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
results, err := fetchGQLPaginated(ctx, g.client, owner, repo, GQLFetchConfig[gqlPRQuery, gqlPRNode, prWithReviews]{
Label: " Fetching PRs:",
Query: &query,
ConsecutiveOldPagesToStop: 2,
GetPageResult: func(q *gqlPRQuery) PageResult[gqlPRNode] {
return PageResult[gqlPRNode]{
TotalCount: q.Repository.PullRequests.TotalCount,
PageInfo: q.Repository.PullRequests.PageInfo,
Nodes: q.Repository.PullRequests.Nodes,
}
},
ProcessNode: func(node gqlPRNode, repoName string) ([]prWithReviews, bool, bool) {
// Skip if not merged - not counted as "old"
if node.MergedAt == nil {
return nil, false, false
}
mergedAt := *node.MergedAt
// Hard cutoff check - stop entirely if past this date
if hardCutoff != nil && mergedAt.Before(*hardCutoff) {
return nil, true, true // Hard stop
}
// Check date range - skip if outside range
if until != nil && mergedAt.After(*until) {
return nil, false, false // Too new, not "old"
}
if since != nil && mergedAt.Before(*since) {
return nil, true, false // Too old - signal for early termination tracking
}
// Convert PR
pr := convertPRNode(node, repoName)
// Convert reviews
var reviews []models.Review
for _, r := range node.Reviews.Nodes {
reviews = append(reviews, convertReviewNode(r, repoName, node.Number))
}
return []prWithReviews{{PR: pr, Reviews: reviews}}, false, false
},
})
if err != nil {
return nil, nil, err
}
// Flatten results
var prs []models.PullRequest
var reviews []models.Review
for _, r := range results {
prs = append(prs, r.PR)
reviews = append(reviews, r.Reviews...)
}
return prs, reviews, nil
}
// issueWithComments bundles an issue with its comments for the generic fetcher
type issueWithComments struct {
Issue models.Issue
Comments []models.IssueComment
}
// FetchIssuesWithComments fetches issues with their comments using GraphQL
func (g *GraphQLClient) FetchIssuesWithComments(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, []models.IssueComment, error) {
var query gqlIssueQuery
// Hard cutoff: 1 week before start date - stop fetching entirely past this point
var hardCutoff *time.Time
if since != nil {
cutoff := since.AddDate(0, 0, -7)
hardCutoff = &cutoff
}
results, err := fetchGQLPaginated(ctx, g.client, owner, repo, GQLFetchConfig[gqlIssueQuery, gqlIssueNode, issueWithComments]{
Label: " Fetching issues:",
Query: &query,
ConsecutiveOldPagesToStop: 2,
GetPageResult: func(q *gqlIssueQuery) PageResult[gqlIssueNode] {
return PageResult[gqlIssueNode]{
TotalCount: q.Repository.Issues.TotalCount,
PageInfo: q.Repository.Issues.PageInfo,
Nodes: q.Repository.Issues.Nodes,
}
},
ProcessNode: func(node gqlIssueNode, repoName string) ([]issueWithComments, bool, bool) {
// Hard cutoff check - stop entirely if past this date
if hardCutoff != nil && node.CreatedAt.Before(*hardCutoff) {
return nil, true, true // Hard stop
}
// Check date range
if until != nil && node.CreatedAt.After(*until) {
return nil, false, false // Too new, not "old"
}
if since != nil && node.CreatedAt.Before(*since) {
return nil, true, false // Too old - signal for early termination tracking
}
// Convert issue
issue := convertIssueNode(node, repoName)
// Convert comments within date range
var comments []models.IssueComment
for _, c := range node.Comments.Nodes {
if until != nil && c.CreatedAt.After(*until) {
continue
}
if since != nil && c.CreatedAt.Before(*since) {
continue
}
comments = append(comments, convertCommentNode(c, repoName, node.Number))
}
return []issueWithComments{{Issue: issue, Comments: comments}}, false, false
},
})
if err != nil {
return nil, nil, err
}
// Flatten results
var issues []models.Issue
var comments []models.IssueComment
for _, r := range results {
issues = append(issues, r.Issue)
comments = append(comments, r.Comments...)
}
return issues, comments, nil
}
// Conversion helpers
func convertActor(a gqlActor) models.Author {
return models.Author{
Login: a.Login,
AvatarURL: a.AvatarURL,
}
}
func convertPRNode(node gqlPRNode, repoName string) models.PullRequest {
state := models.PRStateOpen
if node.Merged {
state = models.PRStateMerged
} else if node.State == "CLOSED" {
state = models.PRStateClosed
}
return models.PullRequest{
Number: node.Number,
Title: node.Title,
State: state,
Author: convertActor(node.Author),
Repository: repoName,
BaseBranch: node.BaseRefName,
HeadBranch: node.HeadRefName,
CreatedAt: node.CreatedAt,
UpdatedAt: node.UpdatedAt,
MergedAt: node.MergedAt,
ClosedAt: node.ClosedAt,
Additions: node.Additions,
Deletions: node.Deletions,
FilesChanged: node.ChangedFiles,
CommitCount: node.Commits.TotalCount,
Comments: node.Reviews.TotalCount,
URL: node.URL,
}
}
func convertReviewNode(node gqlReviewNode, repoName string, prNumber int) models.Review {
var submittedAt time.Time
if node.SubmittedAt != nil {
submittedAt = *node.SubmittedAt
}
return models.Review{
PullRequest: prNumber,
Repository: repoName,
Author: convertActor(node.Author),
State: models.ReviewState(node.State),
SubmittedAt: submittedAt,
Body: node.Body,
CommentsCount: node.Comments.TotalCount,
}
}
func convertIssueNode(node gqlIssueNode, repoName string) models.Issue {
state := models.IssueStateOpen
if node.State == "CLOSED" {
state = models.IssueStateClosed
}
var labels []string
for _, l := range node.Labels.Nodes {
labels = append(labels, l.Name)
}
return models.Issue{
Number: node.Number,
Title: node.Title,
State: state,
Author: convertActor(node.Author),
Repository: repoName,
CreatedAt: node.CreatedAt,
UpdatedAt: node.UpdatedAt,
ClosedAt: node.ClosedAt,
Comments: node.Comments.TotalCount,
Labels: labels,
URL: node.URL,
}
}
func convertCommentNode(node gqlCommentNode, repoName string, issueNumber int) models.IssueComment {
return models.IssueComment{
Issue: issueNumber,
Repository: repoName,
Author: convertActor(node.Author),
Body: node.Body,
CreatedAt: node.CreatedAt,
}
}