From 5115551543378e805ed663b800e3878703ab05e4 Mon Sep 17 00:00:00 2001 From: Lukasz Raczylo Date: Thu, 11 Dec 2025 22:12:17 +0000 Subject: [PATCH] Abstract git operations into generics. --- internal/github/client.go | 485 ++++++++++++------------------------- internal/github/fetcher.go | 306 +++++++++++++++++++++++ 2 files changed, 463 insertions(+), 328 deletions(-) create mode 100644 internal/github/fetcher.go diff --git a/internal/github/client.go b/internal/github/client.go index f4a6ced..5896d08 100644 --- a/internal/github/client.go +++ b/internal/github/client.go @@ -323,16 +323,6 @@ func (c *Client) GetCommitCountSince(ctx context.Context, owner, repo string, si func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) { cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until) - // Check cache - if cached, ok := c.cache.Get(cacheKey); ok { - if commits, ok := cached.([]models.Commit); ok { - c.progress(" Using cached commits data") - return commits, nil - } - } - - var allCommits []models.Commit - opts := &github.CommitsListOptions{ ListOptions: github.ListOptions{PerPage: 100}, } @@ -344,23 +334,19 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un opts.Until = *until } - page := 1 - for { - var commits []*github.RepositoryCommit - var resp *github.Response - - err := c.retryWithBackoff(ctx, "list commits", func() error { - var err error - commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts) - return err - }) - if err != nil { - return nil, fmt.Errorf("failed to list commits: %w", err) - } - - c.progress(fmt.Sprintf(" Fetching commits page %d (%d commits so far)...", page, len(allCommits))) - - for i, commit := range commits { + fetcher := &EnrichingFetcher[*github.RepositoryCommit, models.Commit]{ + FetchFn: func(ctx context.Context, page int) ([]*github.RepositoryCommit, *github.Response, error) { + opts.Page = page + var commits []*github.RepositoryCommit + var resp *github.Response + err := c.retryWithBackoff(ctx, "list commits", func() error { + var err error + commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts) + return err + }) + return commits, resp, err + }, + EnrichFn: func(ctx context.Context, commit *github.RepositoryCommit) (models.Commit, error) { // Fetch detailed commit info for stats var detailed *github.RepositoryCommit err := c.retryWithBackoff(ctx, fmt.Sprintf("get commit %s", commit.GetSHA()[:7]), func() error { @@ -369,31 +355,24 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un return err }) if err != nil { - // Log and continue - we can still use basic info - c.progress(fmt.Sprintf(" Warning: failed to get commit details for %s: %v", commit.GetSHA()[:7], err)) - continue + return models.Commit{}, err } - - mc := convertCommit(detailed, owner, repo) - allCommits = append(allCommits, mc) - - // Progress every 10 commits - if (i+1)%10 == 0 { - c.progress(fmt.Sprintf(" Processing commit %d/%d on page %d...", i+1, len(commits), page)) + return convertCommit(detailed, owner, repo), nil + }, + GetDateFn: func(commit *github.RepositoryCommit) time.Time { + if commit.Commit != nil && commit.Commit.Author != nil { + return commit.Commit.Author.GetDate().Time } - } - - if resp.NextPage == 0 { - break - } - opts.Page = resp.NextPage - page++ + return time.Time{} + }, + Since: since, + Until: until, } - // Cache results - c.cache.Set(cacheKey, allCommits) + config := DefaultFetchConfig("commits") + config.EarlyTermination = false // GitHub API already filters by since/until - return allCommits, nil + return FetchAllPagesWithEnrichment(ctx, c, cacheKey, config, fetcher, 10) } // mainBranches are the branches we consider as "main" branches @@ -434,11 +413,9 @@ func (c *Client) FetchPullRequests(ctx context.Context, owner, repo string, sinc // fetchPRsForBranch fetches merged PRs for a specific base branch func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch string, since, until *time.Time) ([]models.PullRequest, error) { - var branchPRs []models.PullRequest - opts := &github.PullRequestListOptions{ State: "closed", - Base: baseBranch, // Filter by base branch at API level + Base: baseBranch, Sort: "updated", Direction: "desc", ListOptions: github.ListOptions{ @@ -446,118 +423,74 @@ func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch }, } - page := 1 - consecutiveOldPages := 0 - - for { - var prs []*github.PullRequest - var resp *github.Response - - err := c.retryWithBackoff(ctx, "list pull requests", func() error { - var err error - prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts) - return err - }) - if err != nil { - return branchPRs, err - } - - if page == 1 && len(prs) > 0 { - c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch)) - } - - matchedInPage := 0 - oldInPage := 0 - - for _, pr := range prs { - // Only consider merged PRs (check MergedAt since Merged field isn't in list response) - if pr.MergedAt == nil { - continue + fetcher := &DateFilteredFetcher[*github.PullRequest, models.PullRequest]{ + FetchFn: func(ctx context.Context, page int) ([]*github.PullRequest, *github.Response, error) { + opts.Page = page + var prs []*github.PullRequest + var resp *github.Response + err := c.retryWithBackoff(ctx, "list pull requests", func() error { + var err error + prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts) + return err + }) + if page == 1 && len(prs) > 0 { + c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch)) } - - // Use merge date for filtering - mergedAt := pr.MergedAt.Time - - // Skip items newer than our range - if until != nil && mergedAt.After(*until) { - continue + return prs, resp, err + }, + ConvertFn: func(pr *github.PullRequest) models.PullRequest { + return convertPullRequest(pr, owner, repo) + }, + GetDateFn: func(pr *github.PullRequest) time.Time { + if pr.MergedAt != nil { + return pr.MergedAt.Time } - - // If older than our range, track it - if since != nil && mergedAt.Before(*since) { - oldInPage++ - continue - } - - mpr := convertPullRequest(pr, owner, repo) - branchPRs = append(branchPRs, mpr) - matchedInPage++ - } - - // Early termination: if we got a page with only old PRs (or empty), increment counter - if matchedInPage == 0 && oldInPage > 0 { - consecutiveOldPages++ - // Stop after 2 consecutive pages of only old PRs - if consecutiveOldPages >= 2 { - break - } - } else { - consecutiveOldPages = 0 - } - - if resp.NextPage == 0 { - break - } - opts.Page = resp.NextPage - page++ + return time.Time{} // Will be filtered out by SkipFn + }, + SkipFn: func(pr *github.PullRequest) bool { + // Only consider merged PRs + return pr.MergedAt == nil + }, + Since: since, + Until: until, } - return branchPRs, nil + config := FetchConfig{ + ResourceName: "pull requests", + EarlyTermination: true, + EarlyTerminationThreshold: 2, + } + + return FetchAllPages(ctx, c, "", config, fetcher) // Empty cache key - parent handles caching } // FetchReviews fetches reviews for a specific pull request func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber int) ([]models.Review, error) { cacheKey := fmt.Sprintf("reviews:%s/%s:%d", owner, repo, prNumber) - // Check cache - if cached, ok := c.cache.Get(cacheKey); ok { - if reviews, ok := cached.([]models.Review); ok { - return reviews, nil - } - } - - var allReviews []models.Review - opts := &github.ListOptions{PerPage: 100} - for { - var reviews []*github.PullRequestReview - var resp *github.Response - - err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error { - var err error - reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts) - return err - }) - if err != nil { - return nil, fmt.Errorf("failed to list reviews: %w", err) - } - - for _, review := range reviews { - mr := convertReview(review, owner, repo, prNumber) - allReviews = append(allReviews, mr) - } - - if resp.NextPage == 0 { - break - } - opts.Page = resp.NextPage + fetcher := &SimpleFetcher[*github.PullRequestReview, models.Review]{ + FetchFn: func(ctx context.Context, page int) ([]*github.PullRequestReview, *github.Response, error) { + opts.Page = page + var reviews []*github.PullRequestReview + var resp *github.Response + err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error { + var err error + reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts) + return err + }) + return reviews, resp, err + }, + ConvertFn: func(review *github.PullRequestReview) models.Review { + return convertReview(review, owner, repo, prNumber) + }, } - // Cache results - c.cache.Set(cacheKey, allReviews) + config := DefaultFetchConfig("reviews") + config.EarlyTermination = false // Reviews don't need date-based early termination - return allReviews, nil + return FetchAllPages(ctx, c, cacheKey, config, fetcher) } // FetchIssues fetches issues from a repository @@ -565,18 +498,6 @@ func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, error) { cacheKey := fmt.Sprintf("issues:%s/%s:%v:%v", owner, repo, since, until) - // Check cache - if cached, ok := c.cache.Get(cacheKey); ok { - if issues, ok := cached.([]models.Issue); ok { - c.progress(" Using cached issues data") - return issues, nil - } - } - - var allIssues []models.Issue - - // Sort by created date descending - newest first - // This allows us to stop early when we hit items older than our date range opts := &github.IssueListByRepoOptions{ State: "all", Sort: "created", @@ -586,77 +507,33 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt }, } - // Note: GitHub Issues API has a 'since' parameter but it filters by update time, not created time - // So we use our own filtering with early termination for better control - - page := 1 - reachedOldItems := false - - for { - var issues []*github.Issue - var resp *github.Response - - err := c.retryWithBackoff(ctx, "list issues", func() error { - var err error - issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts) - return err - }) - if err != nil { - return nil, fmt.Errorf("failed to list issues: %w", err) - } - - c.progress(fmt.Sprintf(" Fetching issues page %d (%d issues so far)...", page, len(allIssues))) - - oldItemsInPage := 0 - totalNonPRItems := 0 - - for _, issue := range issues { + fetcher := &DateFilteredFetcher[*github.Issue, models.Issue]{ + FetchFn: func(ctx context.Context, page int) ([]*github.Issue, *github.Response, error) { + opts.Page = page + var issues []*github.Issue + var resp *github.Response + err := c.retryWithBackoff(ctx, "list issues", func() error { + var err error + issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts) + return err + }) + return issues, resp, err + }, + ConvertFn: func(issue *github.Issue) models.Issue { + return convertIssue(issue, owner, repo) + }, + GetDateFn: func(issue *github.Issue) time.Time { + return issue.GetCreatedAt().Time + }, + SkipFn: func(issue *github.Issue) bool { // Skip pull requests (they appear in issues API) - if issue.PullRequestLinks != nil { - continue - } - - totalNonPRItems++ - createdAt := issue.GetCreatedAt().Time - - // Skip items newer than our range (when until is specified) - if until != nil && createdAt.After(*until) { - continue - } - - // If we've gone past our date range (older than since), count it - if since != nil && createdAt.Before(*since) { - oldItemsInPage++ - continue - } - - mi := convertIssue(issue, owner, repo) - allIssues = append(allIssues, mi) - } - - // If all non-PR items in this page are older than our range, we can stop - // (since results are sorted by created date descending) - if oldItemsInPage == totalNonPRItems && totalNonPRItems > 0 { - c.progress(fmt.Sprintf(" Reached issues older than date range, stopping early (page %d)", page)) - reachedOldItems = true - break - } - - if resp.NextPage == 0 { - break - } - opts.Page = resp.NextPage - page++ + return issue.PullRequestLinks != nil + }, + Since: since, + Until: until, } - if !reachedOldItems && page > 1 { - c.progress(fmt.Sprintf(" Fetched all %d pages of issues", page)) - } - - // Cache results - c.cache.Set(cacheKey, allIssues) - - return allIssues, nil + return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issues"), fetcher) } // FetchIssueComments fetches comments on issues from a repository @@ -664,18 +541,6 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.IssueComment, error) { cacheKey := fmt.Sprintf("issue_comments:%s/%s:%v:%v", owner, repo, since, until) - // Check cache - if cached, ok := c.cache.Get(cacheKey); ok { - if comments, ok := cached.([]models.IssueComment); ok { - c.progress(" Using cached issue comments data") - return comments, nil - } - } - - var allComments []models.IssueComment - - // Sort by created date descending - newest first - // This allows us to stop early when we hit items older than our date range opts := &github.IssueListCommentsOptions{ Sort: github.Ptr("created"), Direction: github.Ptr("desc"), @@ -689,97 +554,29 @@ func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, sin opts.Since = since } - page := 1 - reachedOldItems := false - - for { - var comments []*github.IssueComment - var resp *github.Response - - err := c.retryWithBackoff(ctx, "list issue comments", func() error { - var err error - // Passing empty issue number fetches all comments in the repo - comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts) - return err - }) - if err != nil { - return nil, fmt.Errorf("failed to list issue comments: %w", err) - } - - c.progress(fmt.Sprintf(" Fetching issue comments page %d (%d comments so far)...", page, len(allComments))) - - oldItemsInPage := 0 - totalItems := len(comments) - - for _, comment := range comments { - createdAt := comment.GetCreatedAt().Time - - // Skip items newer than our range (when until is specified) - if until != nil && createdAt.After(*until) { - continue - } - - // If we've gone past our date range (older than since), count it - if since != nil && createdAt.Before(*since) { - oldItemsInPage++ - continue - } - - // Extract issue number from the issue URL - issueNumber := 0 - if comment.IssueURL != nil { - // Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number} - parts := strings.Split(*comment.IssueURL, "/") - if len(parts) > 0 { - if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil { - issueNumber = num - } - } - } - - var author models.Author - if comment.User != nil { - author = models.Author{ - Login: comment.User.GetLogin(), - Name: comment.User.GetName(), - AvatarURL: comment.User.GetAvatarURL(), - } - } - - ic := models.IssueComment{ - ID: comment.GetID(), - Issue: issueNumber, - Repository: fmt.Sprintf("%s/%s", owner, repo), - Author: author, - Body: comment.GetBody(), - CreatedAt: createdAt, - } - allComments = append(allComments, ic) - } - - // If all items in this page are older than our range, we can stop - // (since results are sorted by created date descending) - if oldItemsInPage == totalItems && totalItems > 0 { - c.progress(fmt.Sprintf(" Reached issue comments older than date range, stopping early (page %d)", page)) - reachedOldItems = true - break - } - - if resp.NextPage == 0 { - break - } - opts.Page = resp.NextPage - page++ + fetcher := &DateFilteredFetcher[*github.IssueComment, models.IssueComment]{ + FetchFn: func(ctx context.Context, page int) ([]*github.IssueComment, *github.Response, error) { + opts.Page = page + var comments []*github.IssueComment + var resp *github.Response + err := c.retryWithBackoff(ctx, "list issue comments", func() error { + var err error + comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts) + return err + }) + return comments, resp, err + }, + ConvertFn: func(comment *github.IssueComment) models.IssueComment { + return convertIssueComment(comment, owner, repo) + }, + GetDateFn: func(comment *github.IssueComment) time.Time { + return comment.GetCreatedAt().Time + }, + Since: since, + Until: until, } - if !reachedOldItems && page > 1 { - c.progress(fmt.Sprintf(" Fetched all %d pages of issue comments", page)) - } - - // Cache results - c.cache.Set(cacheKey, allComments) - - return allComments, nil + return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issue comments"), fetcher) } // UserProfile contains GitHub user profile information useful for deduplication @@ -1042,6 +839,38 @@ func convertReview(r *github.PullRequestReview, owner, repo string, prNumber int } } +func convertIssueComment(comment *github.IssueComment, owner, repo string) models.IssueComment { + // Extract issue number from the issue URL + issueNumber := 0 + if comment.IssueURL != nil { + // Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number} + parts := strings.Split(*comment.IssueURL, "/") + if len(parts) > 0 { + if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil { + issueNumber = num + } + } + } + + var author models.Author + if comment.User != nil { + author = models.Author{ + Login: comment.User.GetLogin(), + Name: comment.User.GetName(), + AvatarURL: comment.User.GetAvatarURL(), + } + } + + return models.IssueComment{ + ID: comment.GetID(), + Issue: issueNumber, + Repository: fmt.Sprintf("%s/%s", owner, repo), + Author: author, + Body: comment.GetBody(), + CreatedAt: comment.GetCreatedAt().Time, + } +} + func convertIssue(i *github.Issue, owner, repo string) models.Issue { var author models.Author if i.User != nil { diff --git a/internal/github/fetcher.go b/internal/github/fetcher.go new file mode 100644 index 0000000..0610256 --- /dev/null +++ b/internal/github/fetcher.go @@ -0,0 +1,306 @@ +package github + +import ( + "context" + "fmt" + "time" + + "github.com/google/go-github/v68/github" +) + +// DateFilterResult represents the result of date filtering +type DateFilterResult int + +const ( + // DateInclude means the item is within the date range + DateInclude DateFilterResult = iota + // DateTooNew means the item is newer than the 'until' date + DateTooNew + // DateTooOld means the item is older than the 'since' date + DateTooOld +) + +// FilterByDate checks if a time falls within the specified date range +func FilterByDate(t time.Time, since, until *time.Time) DateFilterResult { + if until != nil && t.After(*until) { + return DateTooNew + } + if since != nil && t.Before(*since) { + return DateTooOld + } + return DateInclude +} + +// PageFetcher is a generic interface for fetching paginated resources +type PageFetcher[T any, R any] interface { + // Fetch retrieves a page of items + Fetch(ctx context.Context, page int) (items []T, resp *github.Response, err error) + // Convert transforms a raw item into the result type + Convert(item T) R + // Filter determines if an item should be included based on date range + // Returns DateInclude to include, DateTooNew/DateTooOld to exclude + Filter(item T) DateFilterResult + // ShouldSkip returns true if the item should be skipped entirely (e.g., PRs in issues list) + ShouldSkip(item T) bool +} + +// FetchConfig holds configuration for paginated fetching +type FetchConfig struct { + // ResourceName is used for progress messages (e.g., "issues", "pull requests") + ResourceName string + // EarlyTermination enables stopping when all items on a page are too old + EarlyTermination bool + // EarlyTerminationThreshold is the number of consecutive old pages before stopping + EarlyTerminationThreshold int +} + +// DefaultFetchConfig returns sensible defaults +func DefaultFetchConfig(resourceName string) FetchConfig { + return FetchConfig{ + ResourceName: resourceName, + EarlyTermination: true, + EarlyTerminationThreshold: 2, + } +} + +// FetchAllPages fetches all pages of a resource with caching, filtering, and early termination +func FetchAllPages[T any, R any]( + ctx context.Context, + c *Client, + cacheKey string, + config FetchConfig, + fetcher PageFetcher[T, R], +) ([]R, error) { + // Check cache first (skip if no cache key provided) + if cacheKey != "" { + if cached, ok := c.cache.Get(cacheKey); ok { + if results, ok := cached.([]R); ok { + c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName)) + return results, nil + } + } + } + + var allResults []R + page := 1 + consecutiveOldPages := 0 + + for { + items, resp, err := fetcher.Fetch(ctx, page) + if err != nil { + return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err) + } + + c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...", + config.ResourceName, page, len(allResults), config.ResourceName)) + + oldInPage := 0 + totalEligible := 0 + + for _, item := range items { + // Skip items that should be filtered out entirely (e.g., PRs in issues API) + if fetcher.ShouldSkip(item) { + continue + } + + totalEligible++ + + // Apply date filtering + switch fetcher.Filter(item) { + case DateTooNew: + continue + case DateTooOld: + oldInPage++ + continue + case DateInclude: + allResults = append(allResults, fetcher.Convert(item)) + } + } + + // Early termination logic + if config.EarlyTermination && totalEligible > 0 && oldInPage == totalEligible { + consecutiveOldPages++ + if consecutiveOldPages >= config.EarlyTerminationThreshold { + c.progress(fmt.Sprintf(" Reached %s older than date range, stopping early (page %d)", + config.ResourceName, page)) + break + } + } else { + consecutiveOldPages = 0 + } + + if resp.NextPage == 0 { + break + } + page = resp.NextPage + } + + // Cache results (skip if no cache key provided) + if cacheKey != "" { + c.cache.Set(cacheKey, allResults) + } + + return allResults, nil +} + +// SimpleFetcher is a helper for creating simple fetchers without date filtering +type SimpleFetcher[T any, R any] struct { + FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error) + ConvertFn func(item T) R +} + +func (f *SimpleFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) { + return f.FetchFn(ctx, page) +} + +func (f *SimpleFetcher[T, R]) Convert(item T) R { + return f.ConvertFn(item) +} + +func (f *SimpleFetcher[T, R]) Filter(item T) DateFilterResult { + return DateInclude // No filtering +} + +func (f *SimpleFetcher[T, R]) ShouldSkip(item T) bool { + return false +} + +// DateFilteredFetcher extends SimpleFetcher with date filtering +type DateFilteredFetcher[T any, R any] struct { + FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error) + ConvertFn func(item T) R + GetDateFn func(item T) time.Time + SkipFn func(item T) bool + Since *time.Time + Until *time.Time +} + +func (f *DateFilteredFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) { + return f.FetchFn(ctx, page) +} + +func (f *DateFilteredFetcher[T, R]) Convert(item T) R { + return f.ConvertFn(item) +} + +func (f *DateFilteredFetcher[T, R]) Filter(item T) DateFilterResult { + return FilterByDate(f.GetDateFn(item), f.Since, f.Until) +} + +func (f *DateFilteredFetcher[T, R]) ShouldSkip(item T) bool { + if f.SkipFn != nil { + return f.SkipFn(item) + } + return false +} + +// WithRetry wraps a fetch function with retry logic +func (c *Client) WithRetry(ctx context.Context, operation string, fn func() error) error { + return c.retryWithBackoff(ctx, operation, fn) +} + +// EnrichingFetcher extends DateFilteredFetcher with per-item enrichment +// This is useful when you need to fetch additional details for each item (e.g., commit details) +type EnrichingFetcher[T any, R any] struct { + FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error) + EnrichFn func(ctx context.Context, item T) (R, error) // Enriches and converts in one step + GetDateFn func(item T) time.Time + SkipFn func(item T) bool + Since *time.Time + Until *time.Time +} + +func (f *EnrichingFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) { + return f.FetchFn(ctx, page) +} + +func (f *EnrichingFetcher[T, R]) Convert(item T) R { + // This won't be used - FetchAllPagesWithEnrichment handles enrichment + var zero R + return zero +} + +func (f *EnrichingFetcher[T, R]) Filter(item T) DateFilterResult { + return FilterByDate(f.GetDateFn(item), f.Since, f.Until) +} + +func (f *EnrichingFetcher[T, R]) ShouldSkip(item T) bool { + if f.SkipFn != nil { + return f.SkipFn(item) + } + return false +} + +// FetchAllPagesWithEnrichment is like FetchAllPages but calls EnrichFn for each item +// This is useful when you need to make additional API calls per item (e.g., fetching commit details) +func FetchAllPagesWithEnrichment[T any, R any]( + ctx context.Context, + c *Client, + cacheKey string, + config FetchConfig, + fetcher *EnrichingFetcher[T, R], + progressEvery int, // Report progress every N items (0 = disabled) +) ([]R, error) { + // Check cache first + if cacheKey != "" { + if cached, ok := c.cache.Get(cacheKey); ok { + if results, ok := cached.([]R); ok { + c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName)) + return results, nil + } + } + } + + var allResults []R + page := 1 + + for { + items, resp, err := fetcher.Fetch(ctx, page) + if err != nil { + return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err) + } + + c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...", + config.ResourceName, page, len(allResults), config.ResourceName)) + + itemsInPage := 0 + for i, item := range items { + // Skip items that should be filtered out entirely + if fetcher.ShouldSkip(item) { + continue + } + + // Apply date filtering + if fetcher.Filter(item) != DateInclude { + continue + } + + // Enrich the item (this may make additional API calls) + enriched, err := fetcher.EnrichFn(ctx, item) + if err != nil { + c.progress(fmt.Sprintf(" Warning: failed to enrich item: %v", err)) + continue + } + + allResults = append(allResults, enriched) + itemsInPage++ + + // Progress reporting + if progressEvery > 0 && (i+1)%progressEvery == 0 { + c.progress(fmt.Sprintf(" Processing item %d/%d on page %d...", i+1, len(items), page)) + } + } + + if resp.NextPage == 0 { + break + } + page = resp.NextPage + } + + // Cache results + if cacheKey != "" { + c.cache.Set(cacheKey, allResults) + } + + return allResults, nil +}