Add limits on the git clone based on the start date.

This commit is contained in:
2025-12-11 22:04:18 +00:00
parent 53b1301404
commit 2f88b1a30a
4 changed files with 92 additions and 4 deletions
+18 -1
View File
@@ -170,7 +170,24 @@ func (a *App) collectRepoData(ctx context.Context, owner, name string, dateRange
if a.gitRepo != nil {
// Clone/update repository locally
token := a.config.Auth.GithubToken
cloneErr := a.gitRepo.EnsureCloned(ctx, owner, name, token)
// Determine clone options (shallow clone if enabled)
var cloneOpts *git.CloneOptions
if a.config.Options.ShallowClone && dateRange.Start != nil {
// Get commit count since start date to determine shallow clone depth
commitCount, countErr := a.client.GetCommitCountSince(ctx, owner, name, *dateRange.Start)
if countErr != nil {
a.log(" Warning: failed to get commit count for shallow clone: %v", countErr)
// Proceed with full clone
} else if commitCount > 0 {
// Add buffer for safety margin
depth := commitCount + a.config.Options.ShallowCloneBuffer
cloneOpts = &git.CloneOptions{Depth: depth}
a.log(" Using shallow clone (depth: %d = %d commits + %d buffer)", depth, commitCount, a.config.Options.ShallowCloneBuffer)
}
}
cloneErr := a.gitRepo.EnsureClonedWithOptions(ctx, owner, name, token, cloneOpts)
if cloneErr != nil {
a.log(" Warning: failed to clone repository locally, falling back to API: %v", cloneErr)
// Fallback to API
+4
View File
@@ -150,6 +150,8 @@ type OptionsConfig struct {
AdditionalBotPatterns []string `yaml:"additional_bot_patterns"` // User-defined patterns (added to hardcoded defaults)
CloneDirectory string `yaml:"clone_directory"` // Directory for local git clones
UseLocalGit bool `yaml:"use_local_git"` // Use local git for commits (faster)
ShallowClone bool `yaml:"shallow_clone"` // Use shallow clone based on date range (faster cloning)
ShallowCloneBuffer int `yaml:"shallow_clone_buffer"` // Extra commits to fetch beyond date range (default: 100)
UserAliases []UserAlias `yaml:"user_aliases,omitempty"` // Manual email/name to login mappings
}
@@ -229,6 +231,8 @@ func DefaultConfig() *Config {
AdditionalBotPatterns: []string{}, // Users can add custom patterns here
CloneDirectory: "./.repos",
UseLocalGit: true, // Default to faster local git analysis
ShallowClone: true, // Default to shallow clone for faster cloning
ShallowCloneBuffer: 25, // Extra commits beyond date range for safety margin
},
}
}
+23 -3
View File
@@ -52,8 +52,19 @@ func (r *Repository) repoPath(owner, name string) string {
return filepath.Join(r.baseDir, owner, name)
}
// CloneOptions contains options for cloning a repository
type CloneOptions struct {
// Depth limits the clone to the specified number of commits (0 = full clone)
Depth int
}
// EnsureCloned ensures a repository is cloned and up to date
func (r *Repository) EnsureCloned(ctx context.Context, owner, name, token string) error {
return r.EnsureClonedWithOptions(ctx, owner, name, token, nil)
}
// EnsureClonedWithOptions ensures a repository is cloned with specific options
func (r *Repository) EnsureClonedWithOptions(ctx context.Context, owner, name, token string, opts *CloneOptions) error {
repoPath := r.repoPath(owner, name)
// Check if already cloned
@@ -65,12 +76,16 @@ func (r *Repository) EnsureCloned(ctx context.Context, owner, name, token string
}
// Clone the repository
r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name))
return r.clone(ctx, owner, name, token, repoPath)
if opts != nil && opts.Depth > 0 {
r.progress(fmt.Sprintf(" Shallow cloning %s/%s (depth: %d)...", owner, name, opts.Depth))
} else {
r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name))
}
return r.clone(ctx, owner, name, token, repoPath, opts)
}
// clone clones a repository using go-git
func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string) error {
func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string, opts *CloneOptions) error {
// Create parent directory
if err := os.MkdirAll(filepath.Dir(destPath), 0750); err != nil {
return fmt.Errorf("failed to create parent directory: %w", err)
@@ -83,6 +98,11 @@ func (r *Repository) clone(ctx context.Context, owner, name, token, destPath str
Progress: nil, // Could add progress writer here
}
// Apply shallow clone depth if provided
if opts != nil && opts.Depth > 0 {
cloneOpts.Depth = opts.Depth
}
// Add authentication if token provided
if token != "" {
cloneOpts.Auth = &http.BasicAuth{
+47
View File
@@ -272,6 +272,53 @@ func (c *Client) ListOrgRepos(ctx context.Context, org, pattern string) ([]strin
return allRepos, nil
}
// GetCommitCountSince returns the approximate number of commits since a given date.
// This is used to determine the optimal shallow clone depth.
// It makes a single lightweight API call with per_page=1 to get pagination info.
func (c *Client) GetCommitCountSince(ctx context.Context, owner, repo string, since time.Time) (int, error) {
opts := &github.CommitsListOptions{
Since: since,
ListOptions: github.ListOptions{
PerPage: 1,
},
}
var resp *github.Response
err := c.retryWithBackoff(ctx, "get commit count", func() error {
var err error
_, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return 0, fmt.Errorf("failed to get commit count: %w", err)
}
// GitHub returns pagination info in the response
// LastPage indicates total number of pages (with 1 item per page = total commits)
if resp.LastPage > 0 {
return resp.LastPage, nil
}
// If LastPage is 0, there's only one page (or no commits)
// In this case, we need to check if there are any commits at all
if resp.FirstPage == 0 && resp.NextPage == 0 {
// Make another call to actually count
opts.ListOptions.PerPage = 100
var commits []*github.RepositoryCommit
err := c.retryWithBackoff(ctx, "count commits", func() error {
var err error
commits, _, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts)
return err
})
if err != nil {
return 0, err
}
return len(commits), nil
}
return 1, nil
}
// FetchCommits fetches commits from a repository within a date range
func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) {
cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until)