Initial commit.

This commit is contained in:
2025-12-10 21:09:25 +00:00
commit 9d4de0e6b6
73 changed files with 15219 additions and 0 deletions
+217
View File
@@ -0,0 +1,217 @@
package cache
import (
"crypto/sha256"
"encoding/gob"
"encoding/hex"
"os"
"path/filepath"
"sync"
"time"
)
// Cache defines the interface for caching
type Cache interface {
Get(key string) (interface{}, bool)
Set(key string, value interface{})
Delete(key string)
Clear() error
}
// FileCache implements file-based caching
type FileCache struct {
directory string
ttl time.Duration
mu sync.RWMutex
}
// cacheEntry wraps a cached value with expiration
type cacheEntry struct {
Value interface{}
ExpiresAt time.Time
}
// NewFileCache creates a new file-based cache
func NewFileCache(directory string, ttl time.Duration) (*FileCache, error) {
// Create directory if it doesn't exist
if err := os.MkdirAll(directory, 0750); err != nil {
return nil, err
}
return &FileCache{
directory: directory,
ttl: ttl,
}, nil
}
// Get retrieves a value from the cache
func (c *FileCache) Get(key string) (interface{}, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
path := c.keyToPath(key)
file, err := os.Open(path) // #nosec G304 -- path is internally generated hash
if err != nil {
return nil, false
}
defer file.Close()
var entry cacheEntry
decoder := gob.NewDecoder(file)
if err := decoder.Decode(&entry); err != nil {
return nil, false
}
// Check expiration
if time.Now().After(entry.ExpiresAt) {
_ = os.Remove(path)
return nil, false
}
return entry.Value, true
}
// Set stores a value in the cache
func (c *FileCache) Set(key string, value interface{}) {
c.mu.Lock()
defer c.mu.Unlock()
entry := cacheEntry{
Value: value,
ExpiresAt: time.Now().Add(c.ttl),
}
path := c.keyToPath(key)
// Ensure parent directory exists
if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil {
return
}
file, err := os.Create(path) // #nosec G304 -- path is internally generated hash
if err != nil {
return
}
defer file.Close()
encoder := gob.NewEncoder(file)
_ = encoder.Encode(entry)
}
// Delete removes a value from the cache
func (c *FileCache) Delete(key string) {
c.mu.Lock()
defer c.mu.Unlock()
path := c.keyToPath(key)
_ = os.Remove(path)
}
// Clear removes all cached values
func (c *FileCache) Clear() error {
c.mu.Lock()
defer c.mu.Unlock()
return os.RemoveAll(c.directory)
}
// keyToPath converts a cache key to a file path
func (c *FileCache) keyToPath(key string) string {
hash := sha256.Sum256([]byte(key))
filename := hex.EncodeToString(hash[:8]) + ".gob"
return filepath.Join(c.directory, filename)
}
// NoopCache is a cache that doesn't cache anything
type NoopCache struct{}
// NewNoopCache creates a new no-op cache
func NewNoopCache() *NoopCache {
return &NoopCache{}
}
// Get always returns false
func (c *NoopCache) Get(key string) (interface{}, bool) {
return nil, false
}
// Set does nothing
func (c *NoopCache) Set(key string, value interface{}) {}
// Delete does nothing
func (c *NoopCache) Delete(key string) {}
// Clear does nothing
func (c *NoopCache) Clear() error {
return nil
}
// MemoryCache implements in-memory caching (useful for testing)
type MemoryCache struct {
data map[string]cacheEntry
ttl time.Duration
mu sync.RWMutex
}
// NewMemoryCache creates a new in-memory cache
func NewMemoryCache(ttl time.Duration) *MemoryCache {
return &MemoryCache{
data: make(map[string]cacheEntry),
ttl: ttl,
}
}
// Get retrieves a value from the cache
func (c *MemoryCache) Get(key string) (interface{}, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
entry, ok := c.data[key]
if !ok {
return nil, false
}
// Check expiration
if time.Now().After(entry.ExpiresAt) {
delete(c.data, key)
return nil, false
}
return entry.Value, true
}
// Set stores a value in the cache
func (c *MemoryCache) Set(key string, value interface{}) {
c.mu.Lock()
defer c.mu.Unlock()
c.data[key] = cacheEntry{
Value: value,
ExpiresAt: time.Now().Add(c.ttl),
}
}
// Delete removes a value from the cache
func (c *MemoryCache) Delete(key string) {
c.mu.Lock()
defer c.mu.Unlock()
delete(c.data, key)
}
// Clear removes all cached values
func (c *MemoryCache) Clear() error {
c.mu.Lock()
defer c.mu.Unlock()
c.data = make(map[string]cacheEntry)
return nil
}
// Register types for gob encoding
func init() {
// Register common types that might be cached
gob.Register([]interface{}{})
gob.Register(map[string]interface{}{})
}
+290
View File
@@ -0,0 +1,290 @@
package cache
import (
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFileCache_Basic(t *testing.T) {
// Create temp directory for cache
tempDir := t.TempDir()
cache, err := NewFileCache(tempDir, time.Hour)
require.NoError(t, err)
// Test Set and Get
cache.Set("test-key", "test-value")
value, ok := cache.Get("test-key")
assert.True(t, ok)
assert.Equal(t, "test-value", value)
}
func TestFileCache_GetNonExistent(t *testing.T) {
tempDir := t.TempDir()
cache, err := NewFileCache(tempDir, time.Hour)
require.NoError(t, err)
value, ok := cache.Get("non-existent")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestFileCache_Expiration(t *testing.T) {
tempDir := t.TempDir()
// Use a very short TTL
cache, err := NewFileCache(tempDir, 50*time.Millisecond)
require.NoError(t, err)
cache.Set("expire-key", "expire-value")
// Should be available immediately
value, ok := cache.Get("expire-key")
assert.True(t, ok)
assert.Equal(t, "expire-value", value)
// Wait for expiration
time.Sleep(100 * time.Millisecond)
// Should be expired now
value, ok = cache.Get("expire-key")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestFileCache_Delete(t *testing.T) {
tempDir := t.TempDir()
cache, err := NewFileCache(tempDir, time.Hour)
require.NoError(t, err)
cache.Set("delete-key", "delete-value")
// Verify it exists
_, ok := cache.Get("delete-key")
assert.True(t, ok)
// Delete it
cache.Delete("delete-key")
// Should be gone
value, ok := cache.Get("delete-key")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestFileCache_Clear(t *testing.T) {
tempDir := t.TempDir()
cache, err := NewFileCache(tempDir, time.Hour)
require.NoError(t, err)
// Add multiple entries
cache.Set("key1", "value1")
cache.Set("key2", "value2")
cache.Set("key3", "value3")
// Clear the cache
err = cache.Clear()
require.NoError(t, err)
// All should be gone
_, ok := cache.Get("key1")
assert.False(t, ok)
_, ok = cache.Get("key2")
assert.False(t, ok)
_, ok = cache.Get("key3")
assert.False(t, ok)
}
func TestFileCache_ComplexValues(t *testing.T) {
tempDir := t.TempDir()
cache, err := NewFileCache(tempDir, time.Hour)
require.NoError(t, err)
// Test with map
mapValue := map[string]interface{}{
"key1": "value1",
"key2": 123,
}
cache.Set("map-key", mapValue)
retrieved, ok := cache.Get("map-key")
assert.True(t, ok)
assert.Equal(t, mapValue, retrieved)
// Test with slice
sliceValue := []interface{}{"a", "b", "c"}
cache.Set("slice-key", sliceValue)
retrieved, ok = cache.Get("slice-key")
assert.True(t, ok)
assert.Equal(t, sliceValue, retrieved)
}
func TestFileCache_CreateDirectory(t *testing.T) {
// Test that NewFileCache creates directory if it doesn't exist
tempDir := filepath.Join(t.TempDir(), "nested", "cache", "dir")
cache, err := NewFileCache(tempDir, time.Hour)
require.NoError(t, err)
// Verify directory was created
info, err := os.Stat(tempDir)
require.NoError(t, err)
assert.True(t, info.IsDir())
// Should be usable
cache.Set("key", "value")
value, ok := cache.Get("key")
assert.True(t, ok)
assert.Equal(t, "value", value)
}
func TestMemoryCache_Basic(t *testing.T) {
t.Parallel()
cache := NewMemoryCache(time.Hour)
// Test Set and Get
cache.Set("test-key", "test-value")
value, ok := cache.Get("test-key")
assert.True(t, ok)
assert.Equal(t, "test-value", value)
}
func TestMemoryCache_GetNonExistent(t *testing.T) {
t.Parallel()
cache := NewMemoryCache(time.Hour)
value, ok := cache.Get("non-existent")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestMemoryCache_Expiration(t *testing.T) {
t.Parallel()
cache := NewMemoryCache(50 * time.Millisecond)
cache.Set("expire-key", "expire-value")
// Should be available immediately
value, ok := cache.Get("expire-key")
assert.True(t, ok)
assert.Equal(t, "expire-value", value)
// Wait for expiration
time.Sleep(100 * time.Millisecond)
// Should be expired now
value, ok = cache.Get("expire-key")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestMemoryCache_Delete(t *testing.T) {
t.Parallel()
cache := NewMemoryCache(time.Hour)
cache.Set("delete-key", "delete-value")
// Verify it exists
_, ok := cache.Get("delete-key")
assert.True(t, ok)
// Delete it
cache.Delete("delete-key")
// Should be gone
value, ok := cache.Get("delete-key")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestMemoryCache_Clear(t *testing.T) {
t.Parallel()
cache := NewMemoryCache(time.Hour)
// Add multiple entries
cache.Set("key1", "value1")
cache.Set("key2", "value2")
cache.Set("key3", "value3")
// Clear the cache
err := cache.Clear()
require.NoError(t, err)
// All should be gone
_, ok := cache.Get("key1")
assert.False(t, ok)
_, ok = cache.Get("key2")
assert.False(t, ok)
_, ok = cache.Get("key3")
assert.False(t, ok)
}
func TestNoopCache_AlwaysReturnsFalse(t *testing.T) {
t.Parallel()
cache := NewNoopCache()
// Set something
cache.Set("key", "value")
// Get should return false
value, ok := cache.Get("key")
assert.False(t, ok)
assert.Nil(t, value)
}
func TestNoopCache_DeleteAndClear(t *testing.T) {
t.Parallel()
cache := NewNoopCache()
// These should not panic or error
cache.Delete("key")
err := cache.Clear()
assert.NoError(t, err)
}
func TestFileCache_KeyToPath(t *testing.T) {
t.Parallel()
cache := &FileCache{directory: "/tmp/cache"}
path1 := cache.keyToPath("key1")
path2 := cache.keyToPath("key2")
path1Again := cache.keyToPath("key1")
// Different keys should produce different paths
assert.NotEqual(t, path1, path2)
// Same key should produce same path
assert.Equal(t, path1, path1Again)
// Path should end with .gob
assert.Contains(t, path1, ".gob")
}
func TestCacheInterface(t *testing.T) {
t.Parallel()
// Ensure all cache types implement the interface
var _ Cache = (*FileCache)(nil)
var _ Cache = (*MemoryCache)(nil)
var _ Cache = (*NoopCache)(nil)
}
+928
View File
@@ -0,0 +1,928 @@
package github
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"strings"
"time"
"github.com/bradleyfalzon/ghinstallation/v2"
"github.com/google/go-github/v68/github"
"github.com/lukaszraczylo/git-velocity/internal/config"
"github.com/lukaszraczylo/git-velocity/internal/domain/models"
"github.com/lukaszraczylo/git-velocity/internal/github/cache"
)
// ProgressCallback is called to report progress during API operations
type ProgressCallback func(message string)
// RetryConfig holds retry settings
type RetryConfig struct {
MaxRetries int
InitialBackoff time.Duration
MaxBackoff time.Duration
}
// DefaultRetryConfig returns the default retry configuration
func DefaultRetryConfig() RetryConfig {
return RetryConfig{
MaxRetries: 3,
InitialBackoff: 1 * time.Second,
MaxBackoff: 30 * time.Second,
}
}
// Client wraps the GitHub API client with rate limiting and caching
type Client struct {
gh *github.Client
config *config.Config
cache cache.Cache
retry RetryConfig
progress ProgressCallback
}
// NewClient creates a new GitHub client with the appropriate authentication
func NewClient(ctx context.Context, cfg *config.Config) (*Client, error) {
var gh *github.Client
// Determine authentication method
if cfg.HasGithubToken() {
gh = github.NewClient(nil).WithAuthToken(cfg.Auth.GithubToken)
} else if cfg.HasGithubApp() {
// GitHub App authentication
privateKey, err := cfg.GetGithubAppPrivateKey()
if err != nil {
return nil, fmt.Errorf("failed to get GitHub App private key: %w", err)
}
itr, err := ghinstallation.New(
http.DefaultTransport,
cfg.Auth.GithubApp.AppID,
cfg.Auth.GithubApp.InstallationID,
privateKey,
)
if err != nil {
return nil, fmt.Errorf("failed to create GitHub App transport: %w", err)
}
gh = github.NewClient(&http.Client{Transport: itr})
} else {
return nil, fmt.Errorf("no authentication method configured")
}
// Initialize cache
var c cache.Cache
if cfg.Cache.Enabled {
ttl, err := cfg.GetCacheTTL()
if err != nil {
return nil, fmt.Errorf("failed to parse cache TTL: %w", err)
}
c, err = cache.NewFileCache(cfg.Cache.Directory, ttl)
if err != nil {
return nil, fmt.Errorf("failed to initialize cache: %w", err)
}
} else {
c = cache.NewNoopCache()
}
return &Client{
gh: gh,
config: cfg,
cache: c,
retry: DefaultRetryConfig(),
progress: func(string) {}, // no-op by default
}, nil
}
// SetProgressCallback sets the callback function for progress reporting
func (c *Client) SetProgressCallback(cb ProgressCallback) {
if cb != nil {
c.progress = cb
}
}
// SetRetryConfig sets the retry configuration
func (c *Client) SetRetryConfig(rc RetryConfig) {
c.retry = rc
}
// retryWithBackoff executes a function with retry logic
// - For rate limit errors: waits until the limit resets (no retry count limit)
// - For network/transient errors: uses exponential backoff with MaxRetries limit
func (c *Client) retryWithBackoff(ctx context.Context, operation string, fn func() error) error {
var lastErr error
backoff := c.retry.InitialBackoff
networkRetries := 0
for {
lastErr = fn()
if lastErr == nil {
return nil
}
// Check if error is retryable at all
if !isRetryableError(lastErr) {
return lastErr
}
c.progress(fmt.Sprintf(" %s failed: %v", operation, lastErr))
// Determine wait strategy based on error type
if resetTime := getRateLimitResetTime(lastErr); resetTime != nil {
// Rate limit error - wait until reset, no retry count limit
waitDuration := time.Until(*resetTime) + time.Second // Add 1s buffer
if waitDuration < 0 {
waitDuration = time.Second
}
c.progress(fmt.Sprintf(" Rate limit hit. Waiting until %s (%s)...", resetTime.Format("15:04:05"), waitDuration.Round(time.Second)))
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(waitDuration):
}
// Reset network retry counter after successful rate limit wait
networkRetries = 0
backoff = c.retry.InitialBackoff
} else {
// Network/transient error - use exponential backoff with retry limit
networkRetries++
if networkRetries > c.retry.MaxRetries {
return fmt.Errorf("%s failed after %d retries: %w", operation, c.retry.MaxRetries, lastErr)
}
c.progress(fmt.Sprintf(" Retry %d/%d for %s (waiting %s)...", networkRetries, c.retry.MaxRetries, operation, backoff))
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(backoff):
}
backoff *= 2
if backoff > c.retry.MaxBackoff {
backoff = c.retry.MaxBackoff
}
}
}
}
// getRateLimitResetTime extracts the reset time from rate limit errors
func getRateLimitResetTime(err error) *time.Time {
if err == nil {
return nil
}
var rateLimitErr *github.RateLimitError
if errors.As(err, &rateLimitErr) && rateLimitErr.Rate.Reset.Time.After(time.Now()) {
t := rateLimitErr.Rate.Reset.Time
return &t
}
var abuseErr *github.AbuseRateLimitError
if errors.As(err, &abuseErr) && abuseErr.RetryAfter != nil {
t := time.Now().Add(*abuseErr.RetryAfter)
return &t
}
return nil
}
// isRetryableError checks if an error is retryable
func isRetryableError(err error) bool {
if err == nil {
return false
}
// Network errors (timeout only - Temporary() is deprecated)
var netErr net.Error
if errors.As(err, &netErr) {
return netErr.Timeout()
}
// GitHub rate limit errors
var rateLimitErr *github.RateLimitError
if errors.As(err, &rateLimitErr) {
return true
}
// GitHub abuse rate limit errors
var abuseErr *github.AbuseRateLimitError
if errors.As(err, &abuseErr) {
return true
}
// Check error message for common transient errors
errStr := err.Error()
retryableMessages := []string{
"connection reset",
"connection refused",
"timeout",
"temporary failure",
"server error",
"502",
"503",
"504",
}
for _, msg := range retryableMessages {
if strings.Contains(strings.ToLower(errStr), msg) {
return true
}
}
return false
}
// ListOrgRepos lists repositories in an organization matching a pattern
func (c *Client) ListOrgRepos(ctx context.Context, org, pattern string) ([]string, error) {
var allRepos []string
opts := &github.RepositoryListByOrgOptions{
Type: "all",
ListOptions: github.ListOptions{
PerPage: 100,
},
}
for {
repos, resp, err := c.gh.Repositories.ListByOrg(ctx, org, opts)
if err != nil {
return nil, fmt.Errorf("failed to list org repos: %w", err)
}
for _, repo := range repos {
name := repo.GetName()
if matchPattern(name, pattern) {
allRepos = append(allRepos, name)
}
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
return allRepos, nil
}
// FetchCommits fetches commits from a repository within a date range
func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) {
cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if commits, ok := cached.([]models.Commit); ok {
c.progress(" Using cached commits data")
return commits, nil
}
}
var allCommits []models.Commit
opts := &github.CommitsListOptions{
ListOptions: github.ListOptions{PerPage: 100},
}
if since != nil {
opts.Since = *since
}
if until != nil {
opts.Until = *until
}
page := 1
for {
var commits []*github.RepositoryCommit
var resp *github.Response
err := c.retryWithBackoff(ctx, "list commits", func() error {
var err error
commits, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list commits: %w", err)
}
c.progress(fmt.Sprintf(" Fetching commits page %d (%d commits so far)...", page, len(allCommits)))
for i, commit := range commits {
// Fetch detailed commit info for stats
var detailed *github.RepositoryCommit
err := c.retryWithBackoff(ctx, fmt.Sprintf("get commit %s", commit.GetSHA()[:7]), func() error {
var err error
detailed, _, err = c.gh.Repositories.GetCommit(ctx, owner, repo, commit.GetSHA(), nil)
return err
})
if err != nil {
// Log and continue - we can still use basic info
c.progress(fmt.Sprintf(" Warning: failed to get commit details for %s: %v", commit.GetSHA()[:7], err))
continue
}
mc := convertCommit(detailed, owner, repo)
allCommits = append(allCommits, mc)
// Progress every 10 commits
if (i+1)%10 == 0 {
c.progress(fmt.Sprintf(" Processing commit %d/%d on page %d...", i+1, len(commits), page))
}
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
}
// Cache results
c.cache.Set(cacheKey, allCommits)
return allCommits, nil
}
// mainBranches are the branches we consider as "main" branches
var mainBranches = []string{"main", "master", "develop", "dev"}
// FetchPullRequests fetches pull requests from a repository
// Fetches PRs targeting main branches, filters by merge date
func (c *Client) FetchPullRequests(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.PullRequest, error) {
cacheKey := fmt.Sprintf("prs:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if prs, ok := cached.([]models.PullRequest); ok {
c.progress(" Using cached pull requests data")
return prs, nil
}
}
var allPRs []models.PullRequest
// Fetch PRs for each main branch separately (API supports base filter)
for _, baseBranch := range mainBranches {
prs, err := c.fetchPRsForBranch(ctx, owner, repo, baseBranch, since, until)
if err != nil {
// Branch might not exist, skip
continue
}
allPRs = append(allPRs, prs...)
}
c.progress(fmt.Sprintf(" Found %d merged PRs to main branches in date range", len(allPRs)))
// Cache results
c.cache.Set(cacheKey, allPRs)
return allPRs, nil
}
// fetchPRsForBranch fetches merged PRs for a specific base branch
func (c *Client) fetchPRsForBranch(ctx context.Context, owner, repo, baseBranch string, since, until *time.Time) ([]models.PullRequest, error) {
var branchPRs []models.PullRequest
opts := &github.PullRequestListOptions{
State: "closed",
Base: baseBranch, // Filter by base branch at API level
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{
PerPage: 100,
},
}
page := 1
consecutiveOldPages := 0
for {
var prs []*github.PullRequest
var resp *github.Response
err := c.retryWithBackoff(ctx, "list pull requests", func() error {
var err error
prs, resp, err = c.gh.PullRequests.List(ctx, owner, repo, opts)
return err
})
if err != nil {
return branchPRs, err
}
if page == 1 && len(prs) > 0 {
c.progress(fmt.Sprintf(" Fetching PRs for branch '%s'...", baseBranch))
}
matchedInPage := 0
oldInPage := 0
for _, pr := range prs {
// Only consider merged PRs (check MergedAt since Merged field isn't in list response)
if pr.MergedAt == nil {
continue
}
// Use merge date for filtering
mergedAt := pr.MergedAt.Time
// Skip items newer than our range
if until != nil && mergedAt.After(*until) {
continue
}
// If older than our range, track it
if since != nil && mergedAt.Before(*since) {
oldInPage++
continue
}
mpr := convertPullRequest(pr, owner, repo)
branchPRs = append(branchPRs, mpr)
matchedInPage++
}
// Early termination: if we got a page with only old PRs (or empty), increment counter
if matchedInPage == 0 && oldInPage > 0 {
consecutiveOldPages++
// Stop after 2 consecutive pages of only old PRs
if consecutiveOldPages >= 2 {
break
}
} else {
consecutiveOldPages = 0
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
}
return branchPRs, nil
}
// FetchReviews fetches reviews for a specific pull request
func (c *Client) FetchReviews(ctx context.Context, owner, repo string, prNumber int) ([]models.Review, error) {
cacheKey := fmt.Sprintf("reviews:%s/%s:%d", owner, repo, prNumber)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if reviews, ok := cached.([]models.Review); ok {
return reviews, nil
}
}
var allReviews []models.Review
opts := &github.ListOptions{PerPage: 100}
for {
var reviews []*github.PullRequestReview
var resp *github.Response
err := c.retryWithBackoff(ctx, fmt.Sprintf("list reviews for PR #%d", prNumber), func() error {
var err error
reviews, resp, err = c.gh.PullRequests.ListReviews(ctx, owner, repo, prNumber, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list reviews: %w", err)
}
for _, review := range reviews {
mr := convertReview(review, owner, repo, prNumber)
allReviews = append(allReviews, mr)
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
// Cache results
c.cache.Set(cacheKey, allReviews)
return allReviews, nil
}
// FetchIssues fetches issues from a repository
// Uses early termination when sorted by date - stops when items are outside date range
func (c *Client) FetchIssues(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Issue, error) {
cacheKey := fmt.Sprintf("issues:%s/%s:%v:%v", owner, repo, since, until)
// Check cache
if cached, ok := c.cache.Get(cacheKey); ok {
if issues, ok := cached.([]models.Issue); ok {
c.progress(" Using cached issues data")
return issues, nil
}
}
var allIssues []models.Issue
// Sort by created date descending - newest first
// This allows us to stop early when we hit items older than our date range
opts := &github.IssueListByRepoOptions{
State: "all",
Sort: "created",
Direction: "desc",
ListOptions: github.ListOptions{
PerPage: 100,
},
}
// Note: GitHub Issues API has a 'since' parameter but it filters by update time, not created time
// So we use our own filtering with early termination for better control
page := 1
reachedOldItems := false
for {
var issues []*github.Issue
var resp *github.Response
err := c.retryWithBackoff(ctx, "list issues", func() error {
var err error
issues, resp, err = c.gh.Issues.ListByRepo(ctx, owner, repo, opts)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to list issues: %w", err)
}
c.progress(fmt.Sprintf(" Fetching issues page %d (%d issues so far)...", page, len(allIssues)))
oldItemsInPage := 0
totalNonPRItems := 0
for _, issue := range issues {
// Skip pull requests (they appear in issues API)
if issue.PullRequestLinks != nil {
continue
}
totalNonPRItems++
createdAt := issue.GetCreatedAt().Time
// Skip items newer than our range (when until is specified)
if until != nil && createdAt.After(*until) {
continue
}
// If we've gone past our date range (older than since), count it
if since != nil && createdAt.Before(*since) {
oldItemsInPage++
continue
}
mi := convertIssue(issue, owner, repo)
allIssues = append(allIssues, mi)
}
// If all non-PR items in this page are older than our range, we can stop
// (since results are sorted by created date descending)
if oldItemsInPage == totalNonPRItems && totalNonPRItems > 0 {
c.progress(fmt.Sprintf(" Reached issues older than date range, stopping early (page %d)", page))
reachedOldItems = true
break
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
page++
}
if !reachedOldItems && page > 1 {
c.progress(fmt.Sprintf(" Fetched all %d pages of issues", page))
}
// Cache results
c.cache.Set(cacheKey, allIssues)
return allIssues, nil
}
// UserProfile contains GitHub user profile information useful for deduplication
type UserProfile struct {
ID int64 // GitHub user ID
Login string // GitHub username
Name string // Display name
Email string // Public email (may be empty)
AvatarURL string
}
// FetchUserProfiles fetches GitHub profiles for a list of logins
// This is useful for deduplication by getting user IDs, names, and public emails
func (c *Client) FetchUserProfiles(ctx context.Context, logins []string) (map[string]UserProfile, error) {
profiles := make(map[string]UserProfile)
// Use semaphore to limit concurrent requests
sem := make(chan struct{}, 10)
results := make(chan struct {
login string
profile UserProfile
err error
}, len(logins))
for _, login := range logins {
go func(login string) {
sem <- struct{}{}
defer func() { <-sem }()
cacheKey := fmt.Sprintf("user_profile_%s", login)
if cached, ok := c.cache.Get(cacheKey); ok {
if profile, ok := cached.(UserProfile); ok {
results <- struct {
login string
profile UserProfile
err error
}{login, profile, nil}
return
}
}
var profile UserProfile
err := c.retryWithBackoff(ctx, "fetch user profile", func() error {
user, _, err := c.gh.Users.Get(ctx, login)
if err != nil {
return err
}
profile = UserProfile{
ID: user.GetID(),
Login: user.GetLogin(),
Name: user.GetName(),
Email: user.GetEmail(),
AvatarURL: user.GetAvatarURL(),
}
return nil
})
if err == nil {
c.cache.Set(cacheKey, profile)
}
results <- struct {
login string
profile UserProfile
err error
}{login, profile, err}
}(login)
}
// Collect results
for range logins {
r := <-results
if r.err == nil {
profiles[r.login] = r.profile
}
}
return profiles, nil
}
// Helper functions
func convertCommit(c *github.RepositoryCommit, owner, repo string) models.Commit {
var author models.Author
if c.Author != nil {
author = models.Author{
Login: c.Author.GetLogin(),
AvatarURL: c.Author.GetAvatarURL(),
}
}
if c.Commit != nil && c.Commit.Author != nil {
author.Name = c.Commit.Author.GetName()
author.Email = c.Commit.Author.GetEmail()
}
var committer models.Author
if c.Committer != nil {
committer = models.Author{
Login: c.Committer.GetLogin(),
AvatarURL: c.Committer.GetAvatarURL(),
}
}
if c.Commit != nil && c.Commit.Committer != nil {
committer.Name = c.Commit.Committer.GetName()
committer.Email = c.Commit.Committer.GetEmail()
}
var date time.Time
if c.Commit != nil && c.Commit.Author != nil {
date = c.Commit.Author.GetDate().Time
}
var additions, deletions, filesChanged int
if c.Stats != nil {
additions = c.Stats.GetAdditions()
deletions = c.Stats.GetDeletions()
}
filesChanged = len(c.Files)
// Detect if commit includes tests
hasTests := false
for _, f := range c.Files {
filename := f.GetFilename()
if strings.Contains(filename, "_test.go") ||
strings.Contains(filename, ".test.") ||
strings.Contains(filename, ".spec.") ||
strings.Contains(filename, "/tests/") ||
strings.Contains(filename, "/test/") ||
strings.Contains(filename, "__tests__") {
hasTests = true
break
}
}
message := ""
if c.Commit != nil {
message = c.Commit.GetMessage()
}
return models.Commit{
SHA: c.GetSHA(),
Message: message,
Author: author,
Committer: committer,
Date: date,
Additions: additions,
Deletions: deletions,
FilesChanged: filesChanged,
Repository: fmt.Sprintf("%s/%s", owner, repo),
URL: c.GetHTMLURL(),
HasTests: hasTests,
}
}
func convertPullRequest(pr *github.PullRequest, owner, repo string) models.PullRequest {
var author models.Author
if pr.User != nil {
author = models.Author{
ID: pr.User.GetID(),
Login: pr.User.GetLogin(),
Name: pr.User.GetName(),
AvatarURL: pr.User.GetAvatarURL(),
}
}
state := models.PRStateOpen
if pr.GetMerged() {
state = models.PRStateMerged
} else if pr.GetState() == "closed" {
state = models.PRStateClosed
}
var mergedAt, closedAt *time.Time
if pr.MergedAt != nil {
t := pr.MergedAt.Time
mergedAt = &t
}
if pr.ClosedAt != nil {
t := pr.ClosedAt.Time
closedAt = &t
}
var baseBranch, headBranch string
if pr.Base != nil {
baseBranch = pr.Base.GetRef()
}
if pr.Head != nil {
headBranch = pr.Head.GetRef()
}
return models.PullRequest{
Number: pr.GetNumber(),
Title: pr.GetTitle(),
State: state,
Author: author,
Repository: fmt.Sprintf("%s/%s", owner, repo),
BaseBranch: baseBranch,
HeadBranch: headBranch,
CreatedAt: pr.GetCreatedAt().Time,
UpdatedAt: pr.GetUpdatedAt().Time,
MergedAt: mergedAt,
ClosedAt: closedAt,
Additions: pr.GetAdditions(),
Deletions: pr.GetDeletions(),
FilesChanged: pr.GetChangedFiles(),
CommitCount: pr.GetCommits(),
Comments: pr.GetComments() + pr.GetReviewComments(),
URL: pr.GetHTMLURL(),
}
}
func convertReview(r *github.PullRequestReview, owner, repo string, prNumber int) models.Review {
var author models.Author
if r.User != nil {
author = models.Author{
ID: r.User.GetID(),
Login: r.User.GetLogin(),
Name: r.User.GetName(),
AvatarURL: r.User.GetAvatarURL(),
}
}
state := models.ReviewState(r.GetState())
submittedAt := time.Time{}
if r.SubmittedAt != nil {
submittedAt = r.SubmittedAt.Time
}
return models.Review{
ID: r.GetID(),
PullRequest: prNumber,
Repository: fmt.Sprintf("%s/%s", owner, repo),
Author: author,
State: state,
SubmittedAt: submittedAt,
Body: r.GetBody(),
}
}
func convertIssue(i *github.Issue, owner, repo string) models.Issue {
var author models.Author
if i.User != nil {
author = models.Author{
Login: i.User.GetLogin(),
Name: i.User.GetName(),
AvatarURL: i.User.GetAvatarURL(),
}
}
state := models.IssueStateOpen
if i.GetState() == "closed" {
state = models.IssueStateClosed
}
var closedAt *time.Time
var closedBy *models.Author
if i.ClosedAt != nil {
t := i.ClosedAt.Time
closedAt = &t
}
if i.ClosedBy != nil {
cb := models.Author{
Login: i.ClosedBy.GetLogin(),
AvatarURL: i.ClosedBy.GetAvatarURL(),
}
closedBy = &cb
}
var labels []string
for _, l := range i.Labels {
labels = append(labels, l.GetName())
}
return models.Issue{
Number: i.GetNumber(),
Title: i.GetTitle(),
State: state,
Author: author,
Repository: fmt.Sprintf("%s/%s", owner, repo),
CreatedAt: i.GetCreatedAt().Time,
UpdatedAt: i.GetUpdatedAt().Time,
ClosedAt: closedAt,
ClosedBy: closedBy,
Comments: i.GetComments(),
Labels: labels,
URL: i.GetHTMLURL(),
}
}
// matchPattern performs simple glob-style pattern matching
func matchPattern(s, pattern string) bool {
if pattern == "*" {
return true
}
// Handle exact match
if !strings.Contains(pattern, "*") {
return s == pattern
}
// Handle prefix match (pattern*)
if strings.HasSuffix(pattern, "*") && !strings.HasPrefix(pattern, "*") {
return strings.HasPrefix(s, strings.TrimSuffix(pattern, "*"))
}
// Handle suffix match (*pattern)
if strings.HasPrefix(pattern, "*") && !strings.HasSuffix(pattern, "*") {
return strings.HasSuffix(s, strings.TrimPrefix(pattern, "*"))
}
// Handle contains match (*pattern*)
if strings.HasPrefix(pattern, "*") && strings.HasSuffix(pattern, "*") {
inner := strings.TrimPrefix(strings.TrimSuffix(pattern, "*"), "*")
return strings.Contains(s, inner)
}
return false
}