diff --git a/internal/app/app.go b/internal/app/app.go index cf0b256..f3c7f8b 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -170,7 +170,24 @@ func (a *App) collectRepoData(ctx context.Context, owner, name string, dateRange if a.gitRepo != nil { // Clone/update repository locally token := a.config.Auth.GithubToken - cloneErr := a.gitRepo.EnsureCloned(ctx, owner, name, token) + + // Determine clone options (shallow clone if enabled) + var cloneOpts *git.CloneOptions + if a.config.Options.ShallowClone && dateRange.Start != nil { + // Get commit count since start date to determine shallow clone depth + commitCount, countErr := a.client.GetCommitCountSince(ctx, owner, name, *dateRange.Start) + if countErr != nil { + a.log(" Warning: failed to get commit count for shallow clone: %v", countErr) + // Proceed with full clone + } else if commitCount > 0 { + // Add buffer for safety margin + depth := commitCount + a.config.Options.ShallowCloneBuffer + cloneOpts = &git.CloneOptions{Depth: depth} + a.log(" Using shallow clone (depth: %d = %d commits + %d buffer)", depth, commitCount, a.config.Options.ShallowCloneBuffer) + } + } + + cloneErr := a.gitRepo.EnsureClonedWithOptions(ctx, owner, name, token, cloneOpts) if cloneErr != nil { a.log(" Warning: failed to clone repository locally, falling back to API: %v", cloneErr) // Fallback to API diff --git a/internal/config/schema.go b/internal/config/schema.go index 7b85dd4..2b49563 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -150,6 +150,8 @@ type OptionsConfig struct { AdditionalBotPatterns []string `yaml:"additional_bot_patterns"` // User-defined patterns (added to hardcoded defaults) CloneDirectory string `yaml:"clone_directory"` // Directory for local git clones UseLocalGit bool `yaml:"use_local_git"` // Use local git for commits (faster) + ShallowClone bool `yaml:"shallow_clone"` // Use shallow clone based on date range (faster cloning) + ShallowCloneBuffer int `yaml:"shallow_clone_buffer"` // Extra commits to fetch beyond date range (default: 100) UserAliases []UserAlias `yaml:"user_aliases,omitempty"` // Manual email/name to login mappings } @@ -229,6 +231,8 @@ func DefaultConfig() *Config { AdditionalBotPatterns: []string{}, // Users can add custom patterns here CloneDirectory: "./.repos", UseLocalGit: true, // Default to faster local git analysis + ShallowClone: true, // Default to shallow clone for faster cloning + ShallowCloneBuffer: 25, // Extra commits beyond date range for safety margin }, } } diff --git a/internal/git/repository.go b/internal/git/repository.go index caf2d14..5ce9796 100644 --- a/internal/git/repository.go +++ b/internal/git/repository.go @@ -52,8 +52,19 @@ func (r *Repository) repoPath(owner, name string) string { return filepath.Join(r.baseDir, owner, name) } +// CloneOptions contains options for cloning a repository +type CloneOptions struct { + // Depth limits the clone to the specified number of commits (0 = full clone) + Depth int +} + // EnsureCloned ensures a repository is cloned and up to date func (r *Repository) EnsureCloned(ctx context.Context, owner, name, token string) error { + return r.EnsureClonedWithOptions(ctx, owner, name, token, nil) +} + +// EnsureClonedWithOptions ensures a repository is cloned with specific options +func (r *Repository) EnsureClonedWithOptions(ctx context.Context, owner, name, token string, opts *CloneOptions) error { repoPath := r.repoPath(owner, name) // Check if already cloned @@ -65,12 +76,16 @@ func (r *Repository) EnsureCloned(ctx context.Context, owner, name, token string } // Clone the repository - r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name)) - return r.clone(ctx, owner, name, token, repoPath) + if opts != nil && opts.Depth > 0 { + r.progress(fmt.Sprintf(" Shallow cloning %s/%s (depth: %d)...", owner, name, opts.Depth)) + } else { + r.progress(fmt.Sprintf(" Cloning %s/%s...", owner, name)) + } + return r.clone(ctx, owner, name, token, repoPath, opts) } // clone clones a repository using go-git -func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string) error { +func (r *Repository) clone(ctx context.Context, owner, name, token, destPath string, opts *CloneOptions) error { // Create parent directory if err := os.MkdirAll(filepath.Dir(destPath), 0750); err != nil { return fmt.Errorf("failed to create parent directory: %w", err) @@ -83,6 +98,11 @@ func (r *Repository) clone(ctx context.Context, owner, name, token, destPath str Progress: nil, // Could add progress writer here } + // Apply shallow clone depth if provided + if opts != nil && opts.Depth > 0 { + cloneOpts.Depth = opts.Depth + } + // Add authentication if token provided if token != "" { cloneOpts.Auth = &http.BasicAuth{ diff --git a/internal/github/client.go b/internal/github/client.go index 49df5bd..f4a6ced 100644 --- a/internal/github/client.go +++ b/internal/github/client.go @@ -272,6 +272,53 @@ func (c *Client) ListOrgRepos(ctx context.Context, org, pattern string) ([]strin return allRepos, nil } +// GetCommitCountSince returns the approximate number of commits since a given date. +// This is used to determine the optimal shallow clone depth. +// It makes a single lightweight API call with per_page=1 to get pagination info. +func (c *Client) GetCommitCountSince(ctx context.Context, owner, repo string, since time.Time) (int, error) { + opts := &github.CommitsListOptions{ + Since: since, + ListOptions: github.ListOptions{ + PerPage: 1, + }, + } + + var resp *github.Response + err := c.retryWithBackoff(ctx, "get commit count", func() error { + var err error + _, resp, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts) + return err + }) + if err != nil { + return 0, fmt.Errorf("failed to get commit count: %w", err) + } + + // GitHub returns pagination info in the response + // LastPage indicates total number of pages (with 1 item per page = total commits) + if resp.LastPage > 0 { + return resp.LastPage, nil + } + + // If LastPage is 0, there's only one page (or no commits) + // In this case, we need to check if there are any commits at all + if resp.FirstPage == 0 && resp.NextPage == 0 { + // Make another call to actually count + opts.ListOptions.PerPage = 100 + var commits []*github.RepositoryCommit + err := c.retryWithBackoff(ctx, "count commits", func() error { + var err error + commits, _, err = c.gh.Repositories.ListCommits(ctx, owner, repo, opts) + return err + }) + if err != nil { + return 0, err + } + return len(commits), nil + } + + return 1, nil +} + // FetchCommits fetches commits from a repository within a date range func (c *Client) FetchCommits(ctx context.Context, owner, repo string, since, until *time.Time) ([]models.Commit, error) { cacheKey := fmt.Sprintf("commits:%s/%s:%v:%v", owner, repo, since, until)