Abstract git operations into generics.

This commit is contained in:
2025-12-11 22:12:17 +00:00
parent 2f88b1a30a
commit 5115551543
2 changed files with 463 additions and 328 deletions
+157 -328
View File
@@ -323,16 +323,6 @@ func (c *Client) GetCommitCountSince(ctx context.Context, owner, repo string, si
func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) {
cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if commits, ok := cached.([]models.Commit); ok {
c.progress(" Using cached commits data")
return commits, nil
}
}
var allCommits []models.Commit
opts := &github.CommitsListOptions{
ListOptions: github.ListOptions{PerPage: 100},
}
@@ -344,23 +334,19 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un
opts.Until = *until
}
page := 1
for {
var commits []*github.RepositoryCommit
var resp *github.Response
err := c.retryWithBackoff(ctx, "list commits", func() error {
var err error
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list commits: %w", err)
}
c.progress(fmt.Sprintf(" Fetching commits page %d (%d commits so far)...", page, len(allCommits)))
for i, commit := range commits {
fetcher := &EnrichingFetcher[*github.RepositoryCommit, models.Commit]{
FetchFn: func(ctx context.Context, page int) ([]*github.RepositoryCommit, *github.Response, error) {
opts.Page = page
var commits []*github.RepositoryCommit
var resp *github.Response
err := c.retryWithBackoff(ctx, "list commits", func() error {
var err error
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
return commits, resp, err
},
EnrichFn: func(ctx context.Context, commit *github.RepositoryCommit) (models.Commit, error) {
// Fetch detailed commit info for stats
var detailed *github.RepositoryCommit
err := c.retryWithBackoff(ctx, fmt.Sprintf("get commit %s", commit.GetSHA()[:7]), func() error {
@@ -369,31 +355,24 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un
return err
})
if err != nil {
// Log and continue - we can still use basic info
c.progress(fmt.Sprintf(" Warning: failed to get commit details for %s: %v", commit.GetSHA()[:7], err))
continue
return models.Commit{}, err
}
mc := convertCommit(detailed, owner, repo)
allCommits = append(allCommits, mc)
// Progress every 10 commits
if (i+1)%10 == 0 {
c.progress(fmt.Sprintf(" Processing commit %d/%d on page %d...", i+1, len(commits), page))
return convertCommit(detailed, owner, repo), nil
},
GetDateFn: func(commit *github.RepositoryCommit) time.Time {
if commit.Commit != nil && commit.Commit.Author != nil {
return commit.Commit.Author.GetDate().Time
}
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
return time.Time{}
},
Since: since,
Until: until,
}
// Cache results
c.cache.Set(cacheKey, allCommits)
config := DefaultFetchConfig("commits")
config.EarlyTermination = false // GitHub API already filters by since/until
return allCommits, nil
return FetchAllPagesWithEnrichment(ctx, c, cacheKey, config, fetcher, 10)
}
// mainBranches are the branches we consider as "main" branches
@@ -434,11 +413,9 @@ func (c *Client) FetchPullRequests(ctx context.Context, owner, repo string, sinc
// fetchPRsForBranch fetches merged PRs for a specific base branch
func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch string, since, until *time.Time) ([]models.PullRequest, error) {
var branchPRs []models.PullRequest
opts := &github.PullRequestListOptions{
State: "closed",
Base: baseBranch, // Filter by base branch at API level
Base: baseBranch,
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{
@@ -446,118 +423,74 @@ func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch
},
}
page := 1
consecutiveOldPages := 0
for {
var prs []*github.PullRequest
var resp *github.Response
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
var err error
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
return err
})
if err != nil {
return branchPRs, err
}
if page == 1 && len(prs) > 0 {
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
}
matchedInPage := 0
oldInPage := 0
for _, pr := range prs {
// Only consider merged PRs (check MergedAt since Merged field isn't in list response)
if pr.MergedAt == nil {
continue
fetcher := &DateFilteredFetcher[*github.PullRequest, models.PullRequest]{
FetchFn: func(ctx context.Context, page int) ([]*github.PullRequest, *github.Response, error) {
opts.Page = page
var prs []*github.PullRequest
var resp *github.Response
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
var err error
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
return err
})
if page == 1 && len(prs) > 0 {
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
}
// Use merge date for filtering
mergedAt := pr.MergedAt.Time
// Skip items newer than our range
if until != nil && mergedAt.After(*until) {
continue
return prs, resp, err
},
ConvertFn: func(pr *github.PullRequest) models.PullRequest {
return convertPullRequest(pr, owner, repo)
},
GetDateFn: func(pr *github.PullRequest) time.Time {
if pr.MergedAt != nil {
return pr.MergedAt.Time
}
// If older than our range, track it
if since != nil && mergedAt.Before(*since) {
oldInPage++
continue
}
mpr := convertPullRequest(pr, owner, repo)
branchPRs = append(branchPRs, mpr)
matchedInPage++
}
// Early termination: if we got a page with only old PRs (or empty), increment counter
if matchedInPage == 0 && oldInPage > 0 {
consecutiveOldPages++
// Stop after 2 consecutive pages of only old PRs
if consecutiveOldPages >= 2 {
break
}
} else {
consecutiveOldPages = 0
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
return time.Time{} // Will be filtered out by SkipFn
},
SkipFn: func(pr *github.PullRequest) bool {
// Only consider merged PRs
return pr.MergedAt == nil
},
Since: since,
Until: until,
}
return branchPRs, nil
config := FetchConfig{
ResourceName: "pull requests",
EarlyTermination: true,
EarlyTerminationThreshold: 2,
}
return FetchAllPages(ctx, c, "", config, fetcher) // Empty cache key - parent handles caching
}
// FetchReviews fetches reviews for a specific pull request
func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber int) ([]models.Review, error) {
cacheKey := fmt.Sprintf("reviews:%s/%s:%d", owner, repo, prNumber)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if reviews, ok := cached.([]models.Review); ok {
return reviews, nil
}
}
var allReviews []models.Review
opts := &github.ListOptions{PerPage: 100}
for {
var reviews []*github.PullRequestReview
var resp *github.Response
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
var err error
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list reviews: %w", err)
}
for _, review := range reviews {
mr := convertReview(review, owner, repo, prNumber)
allReviews = append(allReviews, mr)
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
fetcher := &SimpleFetcher[*github.PullRequestReview, models.Review]{
FetchFn: func(ctx context.Context, page int) ([]*github.PullRequestReview, *github.Response, error) {
opts.Page = page
var reviews []*github.PullRequestReview
var resp *github.Response
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
var err error
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
return err
})
return reviews, resp, err
},
ConvertFn: func(review *github.PullRequestReview) models.Review {
return convertReview(review, owner, repo, prNumber)
},
}
// Cache results
c.cache.Set(cacheKey, allReviews)
config := DefaultFetchConfig("reviews")
config.EarlyTermination = false // Reviews don't need date-based early termination
return allReviews, nil
return FetchAllPages(ctx, c, cacheKey, config, fetcher)
}
// FetchIssues fetches issues from a repository
@@ -565,18 +498,6 @@ func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber
func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, error) {
cacheKey := fmt.Sprintf("issues:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if issues, ok := cached.([]models.Issue); ok {
c.progress(" Using cached issues data")
return issues, nil
}
}
var allIssues []models.Issue
// Sort by created date descending - newest first
// This allows us to stop early when we hit items older than our date range
opts := &github.IssueListByRepoOptions{
State: "all",
Sort: "created",
@@ -586,77 +507,33 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt
},
}
// Note: GitHub Issues API has a 'since' parameter but it filters by update time, not created time
// So we use our own filtering with early termination for better control
page := 1
reachedOldItems := false
for {
var issues []*github.Issue
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issues", func() error {
var err error
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list issues: %w", err)
}
c.progress(fmt.Sprintf(" Fetching issues page %d (%d issues so far)...", page, len(allIssues)))
oldItemsInPage := 0
totalNonPRItems := 0
for _, issue := range issues {
fetcher := &DateFilteredFetcher[*github.Issue, models.Issue]{
FetchFn: func(ctx context.Context, page int) ([]*github.Issue, *github.Response, error) {
opts.Page = page
var issues []*github.Issue
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issues", func() error {
var err error
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
return err
})
return issues, resp, err
},
ConvertFn: func(issue *github.Issue) models.Issue {
return convertIssue(issue, owner, repo)
},
GetDateFn: func(issue *github.Issue) time.Time {
return issue.GetCreatedAt().Time
},
SkipFn: func(issue *github.Issue) bool {
// Skip pull requests (they appear in issues API)
if issue.PullRequestLinks != nil {
continue
}
totalNonPRItems++
createdAt := issue.GetCreatedAt().Time
// Skip items newer than our range (when until is specified)
if until != nil && createdAt.After(*until) {
continue
}
// If we've gone past our date range (older than since), count it
if since != nil && createdAt.Before(*since) {
oldItemsInPage++
continue
}
mi := convertIssue(issue, owner, repo)
allIssues = append(allIssues, mi)
}
// If all non-PR items in this page are older than our range, we can stop
// (since results are sorted by created date descending)
if oldItemsInPage == totalNonPRItems && totalNonPRItems > 0 {
c.progress(fmt.Sprintf(" Reached issues older than date range, stopping early (page %d)", page))
reachedOldItems = true
break
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
return issue.PullRequestLinks != nil
},
Since: since,
Until: until,
}
if !reachedOldItems && page > 1 {
c.progress(fmt.Sprintf(" Fetched all %d pages of issues", page))
}
// Cache results
c.cache.Set(cacheKey, allIssues)
return allIssues, nil
return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issues"), fetcher)
}
// FetchIssueComments fetches comments on issues from a repository
@@ -664,18 +541,6 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt
func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.IssueComment, error) {
cacheKey := fmt.Sprintf("issue_comments:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if comments, ok := cached.([]models.IssueComment); ok {
c.progress(" Using cached issue comments data")
return comments, nil
}
}
var allComments []models.IssueComment
// Sort by created date descending - newest first
// This allows us to stop early when we hit items older than our date range
opts := &github.IssueListCommentsOptions{
Sort: github.Ptr("created"),
Direction: github.Ptr("desc"),
@@ -689,97 +554,29 @@ func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, sin
opts.Since = since
}
page := 1
reachedOldItems := false
for {
var comments []*github.IssueComment
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issue comments", func() error {
var err error
// Passing empty issue number fetches all comments in the repo
comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list issue comments: %w", err)
}
c.progress(fmt.Sprintf(" Fetching issue comments page %d (%d comments so far)...", page, len(allComments)))
oldItemsInPage := 0
totalItems := len(comments)
for _, comment := range comments {
createdAt := comment.GetCreatedAt().Time
// Skip items newer than our range (when until is specified)
if until != nil && createdAt.After(*until) {
continue
}
// If we've gone past our date range (older than since), count it
if since != nil && createdAt.Before(*since) {
oldItemsInPage++
continue
}
// Extract issue number from the issue URL
issueNumber := 0
if comment.IssueURL != nil {
// Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number}
parts := strings.Split(*comment.IssueURL, "/")
if len(parts) > 0 {
if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil {
issueNumber = num
}
}
}
var author models.Author
if comment.User != nil {
author = models.Author{
Login: comment.User.GetLogin(),
Name: comment.User.GetName(),
AvatarURL: comment.User.GetAvatarURL(),
}
}
ic := models.IssueComment{
ID: comment.GetID(),
Issue: issueNumber,
Repository: fmt.Sprintf("%s/%s", owner, repo),
Author: author,
Body: comment.GetBody(),
CreatedAt: createdAt,
}
allComments = append(allComments, ic)
}
// If all items in this page are older than our range, we can stop
// (since results are sorted by created date descending)
if oldItemsInPage == totalItems && totalItems > 0 {
c.progress(fmt.Sprintf(" Reached issue comments older than date range, stopping early (page %d)", page))
reachedOldItems = true
break
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
fetcher := &DateFilteredFetcher[*github.IssueComment, models.IssueComment]{
FetchFn: func(ctx context.Context, page int) ([]*github.IssueComment, *github.Response, error) {
opts.Page = page
var comments []*github.IssueComment
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issue comments", func() error {
var err error
comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts)
return err
})
return comments, resp, err
},
ConvertFn: func(comment *github.IssueComment) models.IssueComment {
return convertIssueComment(comment, owner, repo)
},
GetDateFn: func(comment *github.IssueComment) time.Time {
return comment.GetCreatedAt().Time
},
Since: since,
Until: until,
}
if !reachedOldItems && page > 1 {
c.progress(fmt.Sprintf(" Fetched all %d pages of issue comments", page))
}
// Cache results
c.cache.Set(cacheKey, allComments)
return allComments, nil
return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issue comments"), fetcher)
}
// UserProfile contains GitHub user profile information useful for deduplication
@@ -1042,6 +839,38 @@ func convertReview(r *github.PullRequestReview, owner, repo string, prNumber int
}
}
func convertIssueComment(comment *github.IssueComment, owner, repo string) models.IssueComment {
// Extract issue number from the issue URL
issueNumber := 0
if comment.IssueURL != nil {
// Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number}
parts := strings.Split(*comment.IssueURL, "/")
if len(parts) > 0 {
if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil {
issueNumber = num
}
}
}
var author models.Author
if comment.User != nil {
author = models.Author{
Login: comment.User.GetLogin(),
Name: comment.User.GetName(),
AvatarURL: comment.User.GetAvatarURL(),
}
}
return models.IssueComment{
ID: comment.GetID(),
Issue: issueNumber,
Repository: fmt.Sprintf("%s/%s", owner, repo),
Author: author,
Body: comment.GetBody(),
CreatedAt: comment.GetCreatedAt().Time,
}
}
func convertIssue(i *github.Issue, owner, repo string) models.Issue {
var author models.Author
if i.User != nil {
+306
View File
@@ -0,0 +1,306 @@
package github
import (
"context"
"fmt"
"time"
"github.com/google/go-github/v68/github"
)
// DateFilterResult represents the result of date filtering
type DateFilterResult int
const (
// DateInclude means the item is within the date range
DateInclude DateFilterResult = iota
// DateTooNew means the item is newer than the 'until' date
DateTooNew
// DateTooOld means the item is older than the 'since' date
DateTooOld
)
// FilterByDate checks if a time falls within the specified date range
func FilterByDate(t time.Time, since, until *time.Time) DateFilterResult {
if until != nil && t.After(*until) {
return DateTooNew
}
if since != nil && t.Before(*since) {
return DateTooOld
}
return DateInclude
}
// PageFetcher is a generic interface for fetching paginated resources
type PageFetcher[T any, R any] interface {
// Fetch retrieves a page of items
Fetch(ctx context.Context, page int) (items []T, resp *github.Response, err error)
// Convert transforms a raw item into the result type
Convert(item T) R
// Filter determines if an item should be included based on date range
// Returns DateInclude to include, DateTooNew/DateTooOld to exclude
Filter(item T) DateFilterResult
// ShouldSkip returns true if the item should be skipped entirely (e.g., PRs in issues list)
ShouldSkip(item T) bool
}
// FetchConfig holds configuration for paginated fetching
type FetchConfig struct {
// ResourceName is used for progress messages (e.g., "issues", "pull requests")
ResourceName string
// EarlyTermination enables stopping when all items on a page are too old
EarlyTermination bool
// EarlyTerminationThreshold is the number of consecutive old pages before stopping
EarlyTerminationThreshold int
}
// DefaultFetchConfig returns sensible defaults
func DefaultFetchConfig(resourceName string) FetchConfig {
return FetchConfig{
ResourceName: resourceName,
EarlyTermination: true,
EarlyTerminationThreshold: 2,
}
}
// FetchAllPages fetches all pages of a resource with caching, filtering, and early termination
func FetchAllPages[T any, R any](
ctx context.Context,
c *Client,
cacheKey string,
config FetchConfig,
fetcher PageFetcher[T, R],
) ([]R, error) {
// Check cache first (skip if no cache key provided)
if cacheKey != "" {
if cached, ok := c.cache.Get(cacheKey); ok {
if results, ok := cached.([]R); ok {
c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName))
return results, nil
}
}
}
var allResults []R
page := 1
consecutiveOldPages := 0
for {
items, resp, err := fetcher.Fetch(ctx, page)
if err != nil {
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
}
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
oldInPage := 0
totalEligible := 0
for _, item := range items {
// Skip items that should be filtered out entirely (e.g., PRs in issues API)
if fetcher.ShouldSkip(item) {
continue
}
totalEligible++
// Apply date filtering
switch fetcher.Filter(item) {
case DateTooNew:
continue
case DateTooOld:
oldInPage++
continue
case DateInclude:
allResults = append(allResults, fetcher.Convert(item))
}
}
// Early termination logic
if config.EarlyTermination && totalEligible > 0 && oldInPage == totalEligible {
consecutiveOldPages++
if consecutiveOldPages >= config.EarlyTerminationThreshold {
c.progress(fmt.Sprintf(" Reached %s older than date range, stopping early (page %d)",
config.ResourceName, page))
break
}
} else {
consecutiveOldPages = 0
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
}
// Cache results (skip if no cache key provided)
if cacheKey != "" {
c.cache.Set(cacheKey, allResults)
}
return allResults, nil
}
// SimpleFetcher is a helper for creating simple fetchers without date filtering
type SimpleFetcher[T any, R any] struct {
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
ConvertFn func(item T) R
}
func (f *SimpleFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
return f.FetchFn(ctx, page)
}
func (f *SimpleFetcher[T, R]) Convert(item T) R {
return f.ConvertFn(item)
}
func (f *SimpleFetcher[T, R]) Filter(item T) DateFilterResult {
return DateInclude // No filtering
}
func (f *SimpleFetcher[T, R]) ShouldSkip(item T) bool {
return false
}
// DateFilteredFetcher extends SimpleFetcher with date filtering
type DateFilteredFetcher[T any, R any] struct {
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
ConvertFn func(item T) R
GetDateFn func(item T) time.Time
SkipFn func(item T) bool
Since *time.Time
Until *time.Time
}
func (f *DateFilteredFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
return f.FetchFn(ctx, page)
}
func (f *DateFilteredFetcher[T, R]) Convert(item T) R {
return f.ConvertFn(item)
}
func (f *DateFilteredFetcher[T, R]) Filter(item T) DateFilterResult {
return FilterByDate(f.GetDateFn(item), f.Since, f.Until)
}
func (f *DateFilteredFetcher[T, R]) ShouldSkip(item T) bool {
if f.SkipFn != nil {
return f.SkipFn(item)
}
return false
}
// WithRetry wraps a fetch function with retry logic
func (c *Client) WithRetry(ctx context.Context, operation string, fn func() error) error {
return c.retryWithBackoff(ctx, operation, fn)
}
// EnrichingFetcher extends DateFilteredFetcher with per-item enrichment
// This is useful when you need to fetch additional details for each item (e.g., commit details)
type EnrichingFetcher[T any, R any] struct {
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
EnrichFn func(ctx context.Context, item T) (R, error) // Enriches and converts in one step
GetDateFn func(item T) time.Time
SkipFn func(item T) bool
Since *time.Time
Until *time.Time
}
func (f *EnrichingFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
return f.FetchFn(ctx, page)
}
func (f *EnrichingFetcher[T, R]) Convert(item T) R {
// This won't be used - FetchAllPagesWithEnrichment handles enrichment
var zero R
return zero
}
func (f *EnrichingFetcher[T, R]) Filter(item T) DateFilterResult {
return FilterByDate(f.GetDateFn(item), f.Since, f.Until)
}
func (f *EnrichingFetcher[T, R]) ShouldSkip(item T) bool {
if f.SkipFn != nil {
return f.SkipFn(item)
}
return false
}
// FetchAllPagesWithEnrichment is like FetchAllPages but calls EnrichFn for each item
// This is useful when you need to make additional API calls per item (e.g., fetching commit details)
func FetchAllPagesWithEnrichment[T any, R any](
ctx context.Context,
c *Client,
cacheKey string,
config FetchConfig,
fetcher *EnrichingFetcher[T, R],
progressEvery int, // Report progress every N items (0 = disabled)
) ([]R, error) {
// Check cache first
if cacheKey != "" {
if cached, ok := c.cache.Get(cacheKey); ok {
if results, ok := cached.([]R); ok {
c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName))
return results, nil
}
}
}
var allResults []R
page := 1
for {
items, resp, err := fetcher.Fetch(ctx, page)
if err != nil {
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
}
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
config.ResourceName, page, len(allResults), config.ResourceName))
itemsInPage := 0
for i, item := range items {
// Skip items that should be filtered out entirely
if fetcher.ShouldSkip(item) {
continue
}
// Apply date filtering
if fetcher.Filter(item) != DateInclude {
continue
}
// Enrich the item (this may make additional API calls)
enriched, err := fetcher.EnrichFn(ctx, item)
if err != nil {
c.progress(fmt.Sprintf(" Warning: failed to enrich item: %v", err))
continue
}
allResults = append(allResults, enriched)
itemsInPage++
// Progress reporting
if progressEvery > 0 && (i+1)%progressEvery == 0 {
c.progress(fmt.Sprintf(" Processing item %d/%d on page %d...", i+1, len(items), page))
}
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
}
// Cache results
if cacheKey != "" {
c.cache.Set(cacheKey, allResults)
}
return allResults, nil
}