mirror of
https://github.com/lukaszraczylo/git-velocity.git
synced 2026-06-05 22:43:56 +00:00
Abstract git operations into generics.
This commit is contained in:
+157
-328
@@ -323,16 +323,6 @@ func (c *Client) GetCommitCountSince(ctx context.Context, owner, repo string, si
|
||||
func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) {
|
||||
cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until)
|
||||
|
||||
// Check cache
|
||||
if cached, ok := c.cache.Get(cacheKey); ok {
|
||||
if commits, ok := cached.([]models.Commit); ok {
|
||||
c.progress(" Using cached commits data")
|
||||
return commits, nil
|
||||
}
|
||||
}
|
||||
|
||||
var allCommits []models.Commit
|
||||
|
||||
opts := &github.CommitsListOptions{
|
||||
ListOptions: github.ListOptions{PerPage: 100},
|
||||
}
|
||||
@@ -344,23 +334,19 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un
|
||||
opts.Until = *until
|
||||
}
|
||||
|
||||
page := 1
|
||||
for {
|
||||
var commits []*github.RepositoryCommit
|
||||
var resp *github.Response
|
||||
|
||||
err := c.retryWithBackoff(ctx, "list commits", func() error {
|
||||
var err error
|
||||
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list commits: %w", err)
|
||||
}
|
||||
|
||||
c.progress(fmt.Sprintf(" Fetching commits page %d (%d commits so far)...", page, len(allCommits)))
|
||||
|
||||
for i, commit := range commits {
|
||||
fetcher := &EnrichingFetcher[*github.RepositoryCommit, models.Commit]{
|
||||
FetchFn: func(ctx context.Context, page int) ([]*github.RepositoryCommit, *github.Response, error) {
|
||||
opts.Page = page
|
||||
var commits []*github.RepositoryCommit
|
||||
var resp *github.Response
|
||||
err := c.retryWithBackoff(ctx, "list commits", func() error {
|
||||
var err error
|
||||
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
|
||||
return err
|
||||
})
|
||||
return commits, resp, err
|
||||
},
|
||||
EnrichFn: func(ctx context.Context, commit *github.RepositoryCommit) (models.Commit, error) {
|
||||
// Fetch detailed commit info for stats
|
||||
var detailed *github.RepositoryCommit
|
||||
err := c.retryWithBackoff(ctx, fmt.Sprintf("get commit %s", commit.GetSHA()[:7]), func() error {
|
||||
@@ -369,31 +355,24 @@ func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, un
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
// Log and continue - we can still use basic info
|
||||
c.progress(fmt.Sprintf(" Warning: failed to get commit details for %s: %v", commit.GetSHA()[:7], err))
|
||||
continue
|
||||
return models.Commit{}, err
|
||||
}
|
||||
|
||||
mc := convertCommit(detailed, owner, repo)
|
||||
allCommits = append(allCommits, mc)
|
||||
|
||||
// Progress every 10 commits
|
||||
if (i+1)%10 == 0 {
|
||||
c.progress(fmt.Sprintf(" Processing commit %d/%d on page %d...", i+1, len(commits), page))
|
||||
return convertCommit(detailed, owner, repo), nil
|
||||
},
|
||||
GetDateFn: func(commit *github.RepositoryCommit) time.Time {
|
||||
if commit.Commit != nil && commit.Commit.Author != nil {
|
||||
return commit.Commit.Author.GetDate().Time
|
||||
}
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
page++
|
||||
return time.Time{}
|
||||
},
|
||||
Since: since,
|
||||
Until: until,
|
||||
}
|
||||
|
||||
// Cache results
|
||||
c.cache.Set(cacheKey, allCommits)
|
||||
config := DefaultFetchConfig("commits")
|
||||
config.EarlyTermination = false // GitHub API already filters by since/until
|
||||
|
||||
return allCommits, nil
|
||||
return FetchAllPagesWithEnrichment(ctx, c, cacheKey, config, fetcher, 10)
|
||||
}
|
||||
|
||||
// mainBranches are the branches we consider as "main" branches
|
||||
@@ -434,11 +413,9 @@ func (c *Client) FetchPullRequests(ctx context.Context, owner, repo string, sinc
|
||||
|
||||
// fetchPRsForBranch fetches merged PRs for a specific base branch
|
||||
func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch string, since, until *time.Time) ([]models.PullRequest, error) {
|
||||
var branchPRs []models.PullRequest
|
||||
|
||||
opts := &github.PullRequestListOptions{
|
||||
State: "closed",
|
||||
Base: baseBranch, // Filter by base branch at API level
|
||||
Base: baseBranch,
|
||||
Sort: "updated",
|
||||
Direction: "desc",
|
||||
ListOptions: github.ListOptions{
|
||||
@@ -446,118 +423,74 @@ func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch
|
||||
},
|
||||
}
|
||||
|
||||
page := 1
|
||||
consecutiveOldPages := 0
|
||||
|
||||
for {
|
||||
var prs []*github.PullRequest
|
||||
var resp *github.Response
|
||||
|
||||
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
|
||||
var err error
|
||||
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return branchPRs, err
|
||||
}
|
||||
|
||||
if page == 1 && len(prs) > 0 {
|
||||
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
|
||||
}
|
||||
|
||||
matchedInPage := 0
|
||||
oldInPage := 0
|
||||
|
||||
for _, pr := range prs {
|
||||
// Only consider merged PRs (check MergedAt since Merged field isn't in list response)
|
||||
if pr.MergedAt == nil {
|
||||
continue
|
||||
fetcher := &DateFilteredFetcher[*github.PullRequest, models.PullRequest]{
|
||||
FetchFn: func(ctx context.Context, page int) ([]*github.PullRequest, *github.Response, error) {
|
||||
opts.Page = page
|
||||
var prs []*github.PullRequest
|
||||
var resp *github.Response
|
||||
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
|
||||
var err error
|
||||
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
|
||||
return err
|
||||
})
|
||||
if page == 1 && len(prs) > 0 {
|
||||
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
|
||||
}
|
||||
|
||||
// Use merge date for filtering
|
||||
mergedAt := pr.MergedAt.Time
|
||||
|
||||
// Skip items newer than our range
|
||||
if until != nil && mergedAt.After(*until) {
|
||||
continue
|
||||
return prs, resp, err
|
||||
},
|
||||
ConvertFn: func(pr *github.PullRequest) models.PullRequest {
|
||||
return convertPullRequest(pr, owner, repo)
|
||||
},
|
||||
GetDateFn: func(pr *github.PullRequest) time.Time {
|
||||
if pr.MergedAt != nil {
|
||||
return pr.MergedAt.Time
|
||||
}
|
||||
|
||||
// If older than our range, track it
|
||||
if since != nil && mergedAt.Before(*since) {
|
||||
oldInPage++
|
||||
continue
|
||||
}
|
||||
|
||||
mpr := convertPullRequest(pr, owner, repo)
|
||||
branchPRs = append(branchPRs, mpr)
|
||||
matchedInPage++
|
||||
}
|
||||
|
||||
// Early termination: if we got a page with only old PRs (or empty), increment counter
|
||||
if matchedInPage == 0 && oldInPage > 0 {
|
||||
consecutiveOldPages++
|
||||
// Stop after 2 consecutive pages of only old PRs
|
||||
if consecutiveOldPages >= 2 {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
consecutiveOldPages = 0
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
page++
|
||||
return time.Time{} // Will be filtered out by SkipFn
|
||||
},
|
||||
SkipFn: func(pr *github.PullRequest) bool {
|
||||
// Only consider merged PRs
|
||||
return pr.MergedAt == nil
|
||||
},
|
||||
Since: since,
|
||||
Until: until,
|
||||
}
|
||||
|
||||
return branchPRs, nil
|
||||
config := FetchConfig{
|
||||
ResourceName: "pull requests",
|
||||
EarlyTermination: true,
|
||||
EarlyTerminationThreshold: 2,
|
||||
}
|
||||
|
||||
return FetchAllPages(ctx, c, "", config, fetcher) // Empty cache key - parent handles caching
|
||||
}
|
||||
|
||||
// FetchReviews fetches reviews for a specific pull request
|
||||
func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber int) ([]models.Review, error) {
|
||||
cacheKey := fmt.Sprintf("reviews:%s/%s:%d", owner, repo, prNumber)
|
||||
|
||||
// Check cache
|
||||
if cached, ok := c.cache.Get(cacheKey); ok {
|
||||
if reviews, ok := cached.([]models.Review); ok {
|
||||
return reviews, nil
|
||||
}
|
||||
}
|
||||
|
||||
var allReviews []models.Review
|
||||
|
||||
opts := &github.ListOptions{PerPage: 100}
|
||||
|
||||
for {
|
||||
var reviews []*github.PullRequestReview
|
||||
var resp *github.Response
|
||||
|
||||
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
|
||||
var err error
|
||||
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list reviews: %w", err)
|
||||
}
|
||||
|
||||
for _, review := range reviews {
|
||||
mr := convertReview(review, owner, repo, prNumber)
|
||||
allReviews = append(allReviews, mr)
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
fetcher := &SimpleFetcher[*github.PullRequestReview, models.Review]{
|
||||
FetchFn: func(ctx context.Context, page int) ([]*github.PullRequestReview, *github.Response, error) {
|
||||
opts.Page = page
|
||||
var reviews []*github.PullRequestReview
|
||||
var resp *github.Response
|
||||
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
|
||||
var err error
|
||||
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
|
||||
return err
|
||||
})
|
||||
return reviews, resp, err
|
||||
},
|
||||
ConvertFn: func(review *github.PullRequestReview) models.Review {
|
||||
return convertReview(review, owner, repo, prNumber)
|
||||
},
|
||||
}
|
||||
|
||||
// Cache results
|
||||
c.cache.Set(cacheKey, allReviews)
|
||||
config := DefaultFetchConfig("reviews")
|
||||
config.EarlyTermination = false // Reviews don't need date-based early termination
|
||||
|
||||
return allReviews, nil
|
||||
return FetchAllPages(ctx, c, cacheKey, config, fetcher)
|
||||
}
|
||||
|
||||
// FetchIssues fetches issues from a repository
|
||||
@@ -565,18 +498,6 @@ func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber
|
||||
func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, error) {
|
||||
cacheKey := fmt.Sprintf("issues:%s/%s:%v:%v", owner, repo, since, until)
|
||||
|
||||
// Check cache
|
||||
if cached, ok := c.cache.Get(cacheKey); ok {
|
||||
if issues, ok := cached.([]models.Issue); ok {
|
||||
c.progress(" Using cached issues data")
|
||||
return issues, nil
|
||||
}
|
||||
}
|
||||
|
||||
var allIssues []models.Issue
|
||||
|
||||
// Sort by created date descending - newest first
|
||||
// This allows us to stop early when we hit items older than our date range
|
||||
opts := &github.IssueListByRepoOptions{
|
||||
State: "all",
|
||||
Sort: "created",
|
||||
@@ -586,77 +507,33 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt
|
||||
},
|
||||
}
|
||||
|
||||
// Note: GitHub Issues API has a 'since' parameter but it filters by update time, not created time
|
||||
// So we use our own filtering with early termination for better control
|
||||
|
||||
page := 1
|
||||
reachedOldItems := false
|
||||
|
||||
for {
|
||||
var issues []*github.Issue
|
||||
var resp *github.Response
|
||||
|
||||
err := c.retryWithBackoff(ctx, "list issues", func() error {
|
||||
var err error
|
||||
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list issues: %w", err)
|
||||
}
|
||||
|
||||
c.progress(fmt.Sprintf(" Fetching issues page %d (%d issues so far)...", page, len(allIssues)))
|
||||
|
||||
oldItemsInPage := 0
|
||||
totalNonPRItems := 0
|
||||
|
||||
for _, issue := range issues {
|
||||
fetcher := &DateFilteredFetcher[*github.Issue, models.Issue]{
|
||||
FetchFn: func(ctx context.Context, page int) ([]*github.Issue, *github.Response, error) {
|
||||
opts.Page = page
|
||||
var issues []*github.Issue
|
||||
var resp *github.Response
|
||||
err := c.retryWithBackoff(ctx, "list issues", func() error {
|
||||
var err error
|
||||
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
|
||||
return err
|
||||
})
|
||||
return issues, resp, err
|
||||
},
|
||||
ConvertFn: func(issue *github.Issue) models.Issue {
|
||||
return convertIssue(issue, owner, repo)
|
||||
},
|
||||
GetDateFn: func(issue *github.Issue) time.Time {
|
||||
return issue.GetCreatedAt().Time
|
||||
},
|
||||
SkipFn: func(issue *github.Issue) bool {
|
||||
// Skip pull requests (they appear in issues API)
|
||||
if issue.PullRequestLinks != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
totalNonPRItems++
|
||||
createdAt := issue.GetCreatedAt().Time
|
||||
|
||||
// Skip items newer than our range (when until is specified)
|
||||
if until != nil && createdAt.After(*until) {
|
||||
continue
|
||||
}
|
||||
|
||||
// If we've gone past our date range (older than since), count it
|
||||
if since != nil && createdAt.Before(*since) {
|
||||
oldItemsInPage++
|
||||
continue
|
||||
}
|
||||
|
||||
mi := convertIssue(issue, owner, repo)
|
||||
allIssues = append(allIssues, mi)
|
||||
}
|
||||
|
||||
// If all non-PR items in this page are older than our range, we can stop
|
||||
// (since results are sorted by created date descending)
|
||||
if oldItemsInPage == totalNonPRItems && totalNonPRItems > 0 {
|
||||
c.progress(fmt.Sprintf(" Reached issues older than date range, stopping early (page %d)", page))
|
||||
reachedOldItems = true
|
||||
break
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
page++
|
||||
return issue.PullRequestLinks != nil
|
||||
},
|
||||
Since: since,
|
||||
Until: until,
|
||||
}
|
||||
|
||||
if !reachedOldItems && page > 1 {
|
||||
c.progress(fmt.Sprintf(" Fetched all %d pages of issues", page))
|
||||
}
|
||||
|
||||
// Cache results
|
||||
c.cache.Set(cacheKey, allIssues)
|
||||
|
||||
return allIssues, nil
|
||||
return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issues"), fetcher)
|
||||
}
|
||||
|
||||
// FetchIssueComments fetches comments on issues from a repository
|
||||
@@ -664,18 +541,6 @@ func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, unt
|
||||
func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.IssueComment, error) {
|
||||
cacheKey := fmt.Sprintf("issue_comments:%s/%s:%v:%v", owner, repo, since, until)
|
||||
|
||||
// Check cache
|
||||
if cached, ok := c.cache.Get(cacheKey); ok {
|
||||
if comments, ok := cached.([]models.IssueComment); ok {
|
||||
c.progress(" Using cached issue comments data")
|
||||
return comments, nil
|
||||
}
|
||||
}
|
||||
|
||||
var allComments []models.IssueComment
|
||||
|
||||
// Sort by created date descending - newest first
|
||||
// This allows us to stop early when we hit items older than our date range
|
||||
opts := &github.IssueListCommentsOptions{
|
||||
Sort: github.Ptr("created"),
|
||||
Direction: github.Ptr("desc"),
|
||||
@@ -689,97 +554,29 @@ func (c *Client) FetchIssueComments(ctx context.Context, owner, repo string, sin
|
||||
opts.Since = since
|
||||
}
|
||||
|
||||
page := 1
|
||||
reachedOldItems := false
|
||||
|
||||
for {
|
||||
var comments []*github.IssueComment
|
||||
var resp *github.Response
|
||||
|
||||
err := c.retryWithBackoff(ctx, "list issue comments", func() error {
|
||||
var err error
|
||||
// Passing empty issue number fetches all comments in the repo
|
||||
comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list issue comments: %w", err)
|
||||
}
|
||||
|
||||
c.progress(fmt.Sprintf(" Fetching issue comments page %d (%d comments so far)...", page, len(allComments)))
|
||||
|
||||
oldItemsInPage := 0
|
||||
totalItems := len(comments)
|
||||
|
||||
for _, comment := range comments {
|
||||
createdAt := comment.GetCreatedAt().Time
|
||||
|
||||
// Skip items newer than our range (when until is specified)
|
||||
if until != nil && createdAt.After(*until) {
|
||||
continue
|
||||
}
|
||||
|
||||
// If we've gone past our date range (older than since), count it
|
||||
if since != nil && createdAt.Before(*since) {
|
||||
oldItemsInPage++
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract issue number from the issue URL
|
||||
issueNumber := 0
|
||||
if comment.IssueURL != nil {
|
||||
// Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number}
|
||||
parts := strings.Split(*comment.IssueURL, "/")
|
||||
if len(parts) > 0 {
|
||||
if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil {
|
||||
issueNumber = num
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var author models.Author
|
||||
if comment.User != nil {
|
||||
author = models.Author{
|
||||
Login: comment.User.GetLogin(),
|
||||
Name: comment.User.GetName(),
|
||||
AvatarURL: comment.User.GetAvatarURL(),
|
||||
}
|
||||
}
|
||||
|
||||
ic := models.IssueComment{
|
||||
ID: comment.GetID(),
|
||||
Issue: issueNumber,
|
||||
Repository: fmt.Sprintf("%s/%s", owner, repo),
|
||||
Author: author,
|
||||
Body: comment.GetBody(),
|
||||
CreatedAt: createdAt,
|
||||
}
|
||||
allComments = append(allComments, ic)
|
||||
}
|
||||
|
||||
// If all items in this page are older than our range, we can stop
|
||||
// (since results are sorted by created date descending)
|
||||
if oldItemsInPage == totalItems && totalItems > 0 {
|
||||
c.progress(fmt.Sprintf(" Reached issue comments older than date range, stopping early (page %d)", page))
|
||||
reachedOldItems = true
|
||||
break
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
page++
|
||||
fetcher := &DateFilteredFetcher[*github.IssueComment, models.IssueComment]{
|
||||
FetchFn: func(ctx context.Context, page int) ([]*github.IssueComment, *github.Response, error) {
|
||||
opts.Page = page
|
||||
var comments []*github.IssueComment
|
||||
var resp *github.Response
|
||||
err := c.retryWithBackoff(ctx, "list issue comments", func() error {
|
||||
var err error
|
||||
comments, resp, err = c.gh.Issues.ListComments(ctx, owner, repo, 0, opts)
|
||||
return err
|
||||
})
|
||||
return comments, resp, err
|
||||
},
|
||||
ConvertFn: func(comment *github.IssueComment) models.IssueComment {
|
||||
return convertIssueComment(comment, owner, repo)
|
||||
},
|
||||
GetDateFn: func(comment *github.IssueComment) time.Time {
|
||||
return comment.GetCreatedAt().Time
|
||||
},
|
||||
Since: since,
|
||||
Until: until,
|
||||
}
|
||||
|
||||
if !reachedOldItems && page > 1 {
|
||||
c.progress(fmt.Sprintf(" Fetched all %d pages of issue comments", page))
|
||||
}
|
||||
|
||||
// Cache results
|
||||
c.cache.Set(cacheKey, allComments)
|
||||
|
||||
return allComments, nil
|
||||
return FetchAllPages(ctx, c, cacheKey, DefaultFetchConfig("issue comments"), fetcher)
|
||||
}
|
||||
|
||||
// UserProfile contains GitHub user profile information useful for deduplication
|
||||
@@ -1042,6 +839,38 @@ func convertReview(r *github.PullRequestReview, owner, repo string, prNumber int
|
||||
}
|
||||
}
|
||||
|
||||
func convertIssueComment(comment *github.IssueComment, owner, repo string) models.IssueComment {
|
||||
// Extract issue number from the issue URL
|
||||
issueNumber := 0
|
||||
if comment.IssueURL != nil {
|
||||
// Issue URL format: https://api.github.com/repos/{owner}/{repo}/issues/{number}
|
||||
parts := strings.Split(*comment.IssueURL, "/")
|
||||
if len(parts) > 0 {
|
||||
if num, err := strconv.Atoi(parts[len(parts)-1]); err == nil {
|
||||
issueNumber = num
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var author models.Author
|
||||
if comment.User != nil {
|
||||
author = models.Author{
|
||||
Login: comment.User.GetLogin(),
|
||||
Name: comment.User.GetName(),
|
||||
AvatarURL: comment.User.GetAvatarURL(),
|
||||
}
|
||||
}
|
||||
|
||||
return models.IssueComment{
|
||||
ID: comment.GetID(),
|
||||
Issue: issueNumber,
|
||||
Repository: fmt.Sprintf("%s/%s", owner, repo),
|
||||
Author: author,
|
||||
Body: comment.GetBody(),
|
||||
CreatedAt: comment.GetCreatedAt().Time,
|
||||
}
|
||||
}
|
||||
|
||||
func convertIssue(i *github.Issue, owner, repo string) models.Issue {
|
||||
var author models.Author
|
||||
if i.User != nil {
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-github/v68/github"
|
||||
)
|
||||
|
||||
// DateFilterResult represents the result of date filtering
|
||||
type DateFilterResult int
|
||||
|
||||
const (
|
||||
// DateInclude means the item is within the date range
|
||||
DateInclude DateFilterResult = iota
|
||||
// DateTooNew means the item is newer than the 'until' date
|
||||
DateTooNew
|
||||
// DateTooOld means the item is older than the 'since' date
|
||||
DateTooOld
|
||||
)
|
||||
|
||||
// FilterByDate checks if a time falls within the specified date range
|
||||
func FilterByDate(t time.Time, since, until *time.Time) DateFilterResult {
|
||||
if until != nil && t.After(*until) {
|
||||
return DateTooNew
|
||||
}
|
||||
if since != nil && t.Before(*since) {
|
||||
return DateTooOld
|
||||
}
|
||||
return DateInclude
|
||||
}
|
||||
|
||||
// PageFetcher is a generic interface for fetching paginated resources
|
||||
type PageFetcher[T any, R any] interface {
|
||||
// Fetch retrieves a page of items
|
||||
Fetch(ctx context.Context, page int) (items []T, resp *github.Response, err error)
|
||||
// Convert transforms a raw item into the result type
|
||||
Convert(item T) R
|
||||
// Filter determines if an item should be included based on date range
|
||||
// Returns DateInclude to include, DateTooNew/DateTooOld to exclude
|
||||
Filter(item T) DateFilterResult
|
||||
// ShouldSkip returns true if the item should be skipped entirely (e.g., PRs in issues list)
|
||||
ShouldSkip(item T) bool
|
||||
}
|
||||
|
||||
// FetchConfig holds configuration for paginated fetching
|
||||
type FetchConfig struct {
|
||||
// ResourceName is used for progress messages (e.g., "issues", "pull requests")
|
||||
ResourceName string
|
||||
// EarlyTermination enables stopping when all items on a page are too old
|
||||
EarlyTermination bool
|
||||
// EarlyTerminationThreshold is the number of consecutive old pages before stopping
|
||||
EarlyTerminationThreshold int
|
||||
}
|
||||
|
||||
// DefaultFetchConfig returns sensible defaults
|
||||
func DefaultFetchConfig(resourceName string) FetchConfig {
|
||||
return FetchConfig{
|
||||
ResourceName: resourceName,
|
||||
EarlyTermination: true,
|
||||
EarlyTerminationThreshold: 2,
|
||||
}
|
||||
}
|
||||
|
||||
// FetchAllPages fetches all pages of a resource with caching, filtering, and early termination
|
||||
func FetchAllPages[T any, R any](
|
||||
ctx context.Context,
|
||||
c *Client,
|
||||
cacheKey string,
|
||||
config FetchConfig,
|
||||
fetcher PageFetcher[T, R],
|
||||
) ([]R, error) {
|
||||
// Check cache first (skip if no cache key provided)
|
||||
if cacheKey != "" {
|
||||
if cached, ok := c.cache.Get(cacheKey); ok {
|
||||
if results, ok := cached.([]R); ok {
|
||||
c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName))
|
||||
return results, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var allResults []R
|
||||
page := 1
|
||||
consecutiveOldPages := 0
|
||||
|
||||
for {
|
||||
items, resp, err := fetcher.Fetch(ctx, page)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
|
||||
}
|
||||
|
||||
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
|
||||
config.ResourceName, page, len(allResults), config.ResourceName))
|
||||
|
||||
oldInPage := 0
|
||||
totalEligible := 0
|
||||
|
||||
for _, item := range items {
|
||||
// Skip items that should be filtered out entirely (e.g., PRs in issues API)
|
||||
if fetcher.ShouldSkip(item) {
|
||||
continue
|
||||
}
|
||||
|
||||
totalEligible++
|
||||
|
||||
// Apply date filtering
|
||||
switch fetcher.Filter(item) {
|
||||
case DateTooNew:
|
||||
continue
|
||||
case DateTooOld:
|
||||
oldInPage++
|
||||
continue
|
||||
case DateInclude:
|
||||
allResults = append(allResults, fetcher.Convert(item))
|
||||
}
|
||||
}
|
||||
|
||||
// Early termination logic
|
||||
if config.EarlyTermination && totalEligible > 0 && oldInPage == totalEligible {
|
||||
consecutiveOldPages++
|
||||
if consecutiveOldPages >= config.EarlyTerminationThreshold {
|
||||
c.progress(fmt.Sprintf(" Reached %s older than date range, stopping early (page %d)",
|
||||
config.ResourceName, page))
|
||||
break
|
||||
}
|
||||
} else {
|
||||
consecutiveOldPages = 0
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
page = resp.NextPage
|
||||
}
|
||||
|
||||
// Cache results (skip if no cache key provided)
|
||||
if cacheKey != "" {
|
||||
c.cache.Set(cacheKey, allResults)
|
||||
}
|
||||
|
||||
return allResults, nil
|
||||
}
|
||||
|
||||
// SimpleFetcher is a helper for creating simple fetchers without date filtering
|
||||
type SimpleFetcher[T any, R any] struct {
|
||||
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
|
||||
ConvertFn func(item T) R
|
||||
}
|
||||
|
||||
func (f *SimpleFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
|
||||
return f.FetchFn(ctx, page)
|
||||
}
|
||||
|
||||
func (f *SimpleFetcher[T, R]) Convert(item T) R {
|
||||
return f.ConvertFn(item)
|
||||
}
|
||||
|
||||
func (f *SimpleFetcher[T, R]) Filter(item T) DateFilterResult {
|
||||
return DateInclude // No filtering
|
||||
}
|
||||
|
||||
func (f *SimpleFetcher[T, R]) ShouldSkip(item T) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// DateFilteredFetcher extends SimpleFetcher with date filtering
|
||||
type DateFilteredFetcher[T any, R any] struct {
|
||||
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
|
||||
ConvertFn func(item T) R
|
||||
GetDateFn func(item T) time.Time
|
||||
SkipFn func(item T) bool
|
||||
Since *time.Time
|
||||
Until *time.Time
|
||||
}
|
||||
|
||||
func (f *DateFilteredFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
|
||||
return f.FetchFn(ctx, page)
|
||||
}
|
||||
|
||||
func (f *DateFilteredFetcher[T, R]) Convert(item T) R {
|
||||
return f.ConvertFn(item)
|
||||
}
|
||||
|
||||
func (f *DateFilteredFetcher[T, R]) Filter(item T) DateFilterResult {
|
||||
return FilterByDate(f.GetDateFn(item), f.Since, f.Until)
|
||||
}
|
||||
|
||||
func (f *DateFilteredFetcher[T, R]) ShouldSkip(item T) bool {
|
||||
if f.SkipFn != nil {
|
||||
return f.SkipFn(item)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// WithRetry wraps a fetch function with retry logic
|
||||
func (c *Client) WithRetry(ctx context.Context, operation string, fn func() error) error {
|
||||
return c.retryWithBackoff(ctx, operation, fn)
|
||||
}
|
||||
|
||||
// EnrichingFetcher extends DateFilteredFetcher with per-item enrichment
|
||||
// This is useful when you need to fetch additional details for each item (e.g., commit details)
|
||||
type EnrichingFetcher[T any, R any] struct {
|
||||
FetchFn func(ctx context.Context, page int) ([]T, *github.Response, error)
|
||||
EnrichFn func(ctx context.Context, item T) (R, error) // Enriches and converts in one step
|
||||
GetDateFn func(item T) time.Time
|
||||
SkipFn func(item T) bool
|
||||
Since *time.Time
|
||||
Until *time.Time
|
||||
}
|
||||
|
||||
func (f *EnrichingFetcher[T, R]) Fetch(ctx context.Context, page int) ([]T, *github.Response, error) {
|
||||
return f.FetchFn(ctx, page)
|
||||
}
|
||||
|
||||
func (f *EnrichingFetcher[T, R]) Convert(item T) R {
|
||||
// This won't be used - FetchAllPagesWithEnrichment handles enrichment
|
||||
var zero R
|
||||
return zero
|
||||
}
|
||||
|
||||
func (f *EnrichingFetcher[T, R]) Filter(item T) DateFilterResult {
|
||||
return FilterByDate(f.GetDateFn(item), f.Since, f.Until)
|
||||
}
|
||||
|
||||
func (f *EnrichingFetcher[T, R]) ShouldSkip(item T) bool {
|
||||
if f.SkipFn != nil {
|
||||
return f.SkipFn(item)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// FetchAllPagesWithEnrichment is like FetchAllPages but calls EnrichFn for each item
|
||||
// This is useful when you need to make additional API calls per item (e.g., fetching commit details)
|
||||
func FetchAllPagesWithEnrichment[T any, R any](
|
||||
ctx context.Context,
|
||||
c *Client,
|
||||
cacheKey string,
|
||||
config FetchConfig,
|
||||
fetcher *EnrichingFetcher[T, R],
|
||||
progressEvery int, // Report progress every N items (0 = disabled)
|
||||
) ([]R, error) {
|
||||
// Check cache first
|
||||
if cacheKey != "" {
|
||||
if cached, ok := c.cache.Get(cacheKey); ok {
|
||||
if results, ok := cached.([]R); ok {
|
||||
c.progress(fmt.Sprintf(" Using cached %s data", config.ResourceName))
|
||||
return results, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var allResults []R
|
||||
page := 1
|
||||
|
||||
for {
|
||||
items, resp, err := fetcher.Fetch(ctx, page)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch %s: %w", config.ResourceName, err)
|
||||
}
|
||||
|
||||
c.progress(fmt.Sprintf(" Fetching %s page %d (%d %s so far)...",
|
||||
config.ResourceName, page, len(allResults), config.ResourceName))
|
||||
|
||||
itemsInPage := 0
|
||||
for i, item := range items {
|
||||
// Skip items that should be filtered out entirely
|
||||
if fetcher.ShouldSkip(item) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Apply date filtering
|
||||
if fetcher.Filter(item) != DateInclude {
|
||||
continue
|
||||
}
|
||||
|
||||
// Enrich the item (this may make additional API calls)
|
||||
enriched, err := fetcher.EnrichFn(ctx, item)
|
||||
if err != nil {
|
||||
c.progress(fmt.Sprintf(" Warning: failed to enrich item: %v", err))
|
||||
continue
|
||||
}
|
||||
|
||||
allResults = append(allResults, enriched)
|
||||
itemsInPage++
|
||||
|
||||
// Progress reporting
|
||||
if progressEvery > 0 && (i+1)%progressEvery == 0 {
|
||||
c.progress(fmt.Sprintf(" Processing item %d/%d on page %d...", i+1, len(items), page))
|
||||
}
|
||||
}
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
page = resp.NextPage
|
||||
}
|
||||
|
||||
// Cache results
|
||||
if cacheKey != "" {
|
||||
c.cache.Set(cacheKey, allResults)
|
||||
}
|
||||
|
||||
return allResults, nil
|
||||
}
|
||||
Reference in New Issue
Block a user