This commit is contained in:
2026-01-02 17:31:03 +00:00
parent e6edf654b9
commit 1f6594d1e3
31 changed files with 3459 additions and 51 deletions
+22 -2
View File
@@ -30,6 +30,7 @@ import (
"github.com/lukaszraczylo/gohoarder/pkg/scanner"
"github.com/lukaszraczylo/gohoarder/pkg/storage"
"github.com/lukaszraczylo/gohoarder/pkg/storage/filesystem"
"github.com/lukaszraczylo/gohoarder/pkg/vcs"
"github.com/lukaszraczylo/gohoarder/pkg/websocket"
"github.com/rs/zerolog/log"
)
@@ -251,9 +252,28 @@ func (a *App) setupServer() error {
a.app.All("/api/admin/bypasses/:id?", a.handleAdminBypasses)
// Proxy handlers (adapted from net/http)
// Load git credentials if configured
var credStore *vcs.CredentialStore
if a.config.Handlers.Go.GitCredentialsFile != "" {
credStore = vcs.NewCredentialStore()
if err := credStore.LoadFromFile(a.config.Handlers.Go.GitCredentialsFile); err != nil {
log.Error().
Err(err).
Str("file", a.config.Handlers.Go.GitCredentialsFile).
Msg("Failed to load git credentials, continuing without pattern-based credentials")
} else if err := credStore.ValidateConfig(); err != nil {
log.Error().
Err(err).
Str("file", a.config.Handlers.Go.GitCredentialsFile).
Msg("Invalid git credentials configuration, continuing without pattern-based credentials")
credStore = nil
}
}
goProxyHandler := goproxy.New(a.cache, a.networkClient, goproxy.Config{
Upstream: "https://proxy.golang.org",
SumDBURL: "https://sum.golang.org",
Upstream: "https://proxy.golang.org",
SumDBURL: "https://sum.golang.org",
CredStore: credStore,
})
a.app.All("/go/*", adaptor.HTTPHandler(http.StripPrefix("/go", goProxyHandler)))
+68
View File
@@ -0,0 +1,68 @@
package auth
import (
"encoding/base64"
"net/http"
"strings"
)
// CredentialExtractor extracts authentication credentials from HTTP requests
type CredentialExtractor struct{}
// NewCredentialExtractor creates a new credential extractor
func NewCredentialExtractor() *CredentialExtractor {
return &CredentialExtractor{}
}
// Extract extracts authentication credentials from an HTTP request
// Returns the full Authorization header value or constructed auth string
func (e *CredentialExtractor) Extract(r *http.Request) string {
// Try Authorization header first (most common)
if auth := r.Header.Get("Authorization"); auth != "" {
return auth
}
// Try Basic auth from URL (for PyPI compatibility)
if username, password, ok := r.BasicAuth(); ok {
auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + password))
return "Basic " + auth
}
// No credentials found
return ""
}
// ExtractScheme returns the authentication scheme (Bearer, Basic, Token)
func (e *CredentialExtractor) ExtractScheme(r *http.Request) string {
auth := e.Extract(r)
if auth == "" {
return ""
}
parts := strings.SplitN(auth, " ", 2)
if len(parts) == 2 {
return parts[0]
}
return ""
}
// ExtractToken extracts just the token part (without scheme)
func (e *CredentialExtractor) ExtractToken(r *http.Request) string {
auth := e.Extract(r)
if auth == "" {
return ""
}
// Remove scheme prefix
auth = strings.TrimPrefix(auth, "Bearer ")
auth = strings.TrimPrefix(auth, "Token ")
auth = strings.TrimPrefix(auth, "Basic ")
return auth
}
// HasCredentials checks if request has any credentials
func (e *CredentialExtractor) HasCredentials(r *http.Request) bool {
return e.Extract(r) != ""
}
+38
View File
@@ -0,0 +1,38 @@
package auth
import (
"crypto/sha256"
"encoding/hex"
"fmt"
)
// CredentialHasher generates hashes of credentials for cache keys
type CredentialHasher struct{}
// NewCredentialHasher creates a new credential hasher
func NewCredentialHasher() *CredentialHasher {
return &CredentialHasher{}
}
// Hash generates a short hash of credentials for use in cache keys
// Returns "public" if no credentials provided
func (h *CredentialHasher) Hash(credentials string) string {
if credentials == "" {
return "public"
}
// Use SHA256 and take first 16 characters (8 bytes)
hash := sha256.Sum256([]byte(credentials))
return hex.EncodeToString(hash[:8])
}
// GenerateCacheKey generates a cache key that includes credential hash
func (h *CredentialHasher) GenerateCacheKey(registry, packageName, version, credentials string) string {
credHash := h.Hash(credentials)
return fmt.Sprintf("%s:%s:%s:%s", registry, packageName, version, credHash)
}
// IsPublicKey checks if a cache key is for public packages (no credentials)
func (h *CredentialHasher) IsPublicKey(cacheKey string) bool {
return len(cacheKey) > 0 && cacheKey[len(cacheKey)-6:] == "public"
}
+109
View File
@@ -0,0 +1,109 @@
package auth
import (
"sync"
"time"
)
// ValidationResult represents a cached credential validation result
type ValidationResult struct {
Allowed bool
ExpiresAt time.Time
Reason string
}
// ValidationCache caches credential validation results to reduce upstream checks
type ValidationCache struct {
cache map[string]*ValidationResult
mu sync.RWMutex
ttl time.Duration
}
// NewValidationCache creates a new validation cache
func NewValidationCache(ttl time.Duration) *ValidationCache {
vc := &ValidationCache{
cache: make(map[string]*ValidationResult),
ttl: ttl,
}
// Start cleanup goroutine
go vc.cleanupExpired()
return vc
}
// Get retrieves a validation result from cache
// Returns (allowed bool, cached bool, reason string)
func (vc *ValidationCache) Get(credHash, packageURL string) (bool, bool, string) {
vc.mu.RLock()
defer vc.mu.RUnlock()
key := credHash + ":" + packageURL
result, exists := vc.cache[key]
if !exists {
return false, false, ""
}
// Check if expired
if time.Now().After(result.ExpiresAt) {
return false, false, ""
}
return result.Allowed, true, result.Reason
}
// Set stores a validation result in cache
func (vc *ValidationCache) Set(credHash, packageURL string, allowed bool, reason string) {
vc.mu.Lock()
defer vc.mu.Unlock()
key := credHash + ":" + packageURL
vc.cache[key] = &ValidationResult{
Allowed: allowed,
ExpiresAt: time.Now().Add(vc.ttl),
Reason: reason,
}
}
// Invalidate removes a specific entry from cache
func (vc *ValidationCache) Invalidate(credHash, packageURL string) {
vc.mu.Lock()
defer vc.mu.Unlock()
key := credHash + ":" + packageURL
delete(vc.cache, key)
}
// InvalidateAll clears the entire cache
func (vc *ValidationCache) InvalidateAll() {
vc.mu.Lock()
defer vc.mu.Unlock()
vc.cache = make(map[string]*ValidationResult)
}
// Size returns the number of cached entries
func (vc *ValidationCache) Size() int {
vc.mu.RLock()
defer vc.mu.RUnlock()
return len(vc.cache)
}
// cleanupExpired removes expired entries periodically
func (vc *ValidationCache) cleanupExpired() {
ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop()
for range ticker.C {
vc.mu.Lock()
now := time.Now()
for key, result := range vc.cache {
if now.After(result.ExpiresAt) {
delete(vc.cache, key)
}
}
vc.mu.Unlock()
}
}
+284
View File
@@ -0,0 +1,284 @@
package auth
import (
"context"
"fmt"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/rs/zerolog/log"
)
// CredentialValidator validates credentials with upstream registries
type CredentialValidator interface {
// ValidateAccess checks if credentials grant access to a package
// Returns (allowed bool, error)
ValidateAccess(ctx context.Context, packageURL string, credentials string) (bool, error)
}
// NPMValidator validates npm registry credentials
type NPMValidator struct {
client *http.Client
timeout time.Duration
}
// NewNPMValidator creates a new npm credential validator
func NewNPMValidator() *NPMValidator {
return &NPMValidator{
client: &http.Client{
Timeout: 5 * time.Second,
},
timeout: 5 * time.Second,
}
}
// ValidateAccess validates npm package access using HEAD request
func (v *NPMValidator) ValidateAccess(ctx context.Context, packageURL string, credentials string) (bool, error) {
req, err := http.NewRequestWithContext(ctx, "HEAD", packageURL, nil)
if err != nil {
return false, err
}
// Add credentials if provided
if credentials != "" {
req.Header.Set("Authorization", credentials)
}
resp, err := v.client.Do(req)
if err != nil {
// Network error - allow cache fallback with warning
log.Warn().Err(err).Str("url", packageURL).Msg("Validation request failed, allowing cache fallback")
return true, fmt.Errorf("validation failed: %w (allowing cache fallback)", err)
}
defer resp.Body.Close()
// Check status code
switch resp.StatusCode {
case 200, 304:
// Access granted
return true, nil
case 401, 403, 404:
// Access denied
return false, fmt.Errorf("access denied: HTTP %d", resp.StatusCode)
default:
// Unexpected status - allow cache fallback with warning
log.Warn().Int("status", resp.StatusCode).Str("url", packageURL).Msg("Unexpected validation status, allowing cache fallback")
return true, fmt.Errorf("unexpected status %d (allowing cache fallback)", resp.StatusCode)
}
}
// PyPIValidator validates PyPI registry credentials
type PyPIValidator struct {
client *http.Client
timeout time.Duration
}
// NewPyPIValidator creates a new PyPI credential validator
func NewPyPIValidator() *PyPIValidator {
return &PyPIValidator{
client: &http.Client{
Timeout: 5 * time.Second,
},
timeout: 5 * time.Second,
}
}
// ValidateAccess validates PyPI package access using HEAD request
func (v *PyPIValidator) ValidateAccess(ctx context.Context, packageURL string, credentials string) (bool, error) {
req, err := http.NewRequestWithContext(ctx, "HEAD", packageURL, nil)
if err != nil {
return false, err
}
// Add credentials if provided
if credentials != "" {
req.Header.Set("Authorization", credentials)
}
resp, err := v.client.Do(req)
if err != nil {
// Network error - allow cache fallback with warning
log.Warn().Err(err).Str("url", packageURL).Msg("Validation request failed, allowing cache fallback")
return true, fmt.Errorf("validation failed: %w (allowing cache fallback)", err)
}
defer resp.Body.Close()
// Check status code
switch resp.StatusCode {
case 200, 304:
// Access granted
return true, nil
case 401, 403, 404:
// Access denied
return false, fmt.Errorf("access denied: HTTP %d", resp.StatusCode)
default:
// Unexpected status - allow cache fallback with warning
log.Warn().Int("status", resp.StatusCode).Str("url", packageURL).Msg("Unexpected validation status, allowing cache fallback")
return true, fmt.Errorf("unexpected status %d (allowing cache fallback)", resp.StatusCode)
}
}
// GoValidator validates Go module credentials
type GoValidator struct {
timeout time.Duration
}
// NewGoValidator creates a new Go module credential validator
func NewGoValidator() *GoValidator {
return &GoValidator{
timeout: 10 * time.Second,
}
}
// ValidateAccess validates Go module access using git ls-remote
func (v *GoValidator) ValidateAccess(ctx context.Context, modulePath string, credentials string) (bool, error) {
// Create context with timeout
ctx, cancel := context.WithTimeout(ctx, v.timeout)
defer cancel()
// Determine repository type and validate accordingly
if strings.HasPrefix(modulePath, "github.com/") {
return v.validateGitHub(ctx, modulePath, credentials)
}
if strings.HasPrefix(modulePath, "gitlab.com/") {
return v.validateGitLab(ctx, modulePath, credentials)
}
// For other Git providers, use generic git validation
return v.validateGit(ctx, modulePath, credentials)
}
func (v *GoValidator) validateGitHub(ctx context.Context, modulePath, credentials string) (bool, error) {
// Extract token from credentials
token := strings.TrimPrefix(credentials, "Bearer ")
token = strings.TrimPrefix(token, "Token ")
if token == "" || token == credentials {
// No token provided or not in expected format
return false, fmt.Errorf("no GitHub token provided")
}
// Build git URL
repoURL := fmt.Sprintf("https://%s.git", modulePath)
// Create temporary directory for .netrc
tempDir, err := os.MkdirTemp("", "gohoarder-validate-*")
if err != nil {
return false, err
}
defer os.RemoveAll(tempDir)
// Create .netrc file with credentials
netrcPath := filepath.Join(tempDir, ".netrc")
netrcContent := fmt.Sprintf("machine github.com\nlogin oauth2\npassword %s\n", token)
if err := os.WriteFile(netrcPath, []byte(netrcContent), 0600); err != nil {
return false, err
}
// Run git ls-remote (lightweight, just checks access)
cmd := exec.CommandContext(ctx, "git", "ls-remote", repoURL, "HEAD")
cmd.Env = append(os.Environ(),
"HOME="+tempDir, // Use temp .netrc
"GIT_TERMINAL_PROMPT=0", // Disable prompts
)
output, err := cmd.CombinedOutput()
if err != nil {
// Check error message
errMsg := string(output)
if strings.Contains(errMsg, "could not read Username") ||
strings.Contains(errMsg, "Authentication failed") ||
strings.Contains(errMsg, "fatal: repository") ||
strings.Contains(errMsg, "not found") {
// Access denied
return false, fmt.Errorf("access denied: %s", strings.TrimSpace(errMsg))
}
// Other error (network, etc.) - allow cache fallback
log.Warn().Err(err).Str("module", modulePath).Msg("Git validation failed, allowing cache fallback")
return true, fmt.Errorf("validation error (allowing cache): %w", err)
}
// Success - repository accessible
return true, nil
}
func (v *GoValidator) validateGitLab(ctx context.Context, modulePath, credentials string) (bool, error) {
// Extract token from credentials
token := strings.TrimPrefix(credentials, "Bearer ")
token = strings.TrimPrefix(token, "Token ")
token = strings.TrimPrefix(token, "Private-Token ")
if token == "" || token == credentials {
// No token provided
return false, fmt.Errorf("no GitLab token provided")
}
// Build git URL
repoURL := fmt.Sprintf("https://%s.git", modulePath)
// Create temporary directory for .netrc
tempDir, err := os.MkdirTemp("", "gohoarder-validate-*")
if err != nil {
return false, err
}
defer os.RemoveAll(tempDir)
// Create .netrc file with credentials
netrcPath := filepath.Join(tempDir, ".netrc")
netrcContent := fmt.Sprintf("machine gitlab.com\nlogin oauth2\npassword %s\n", token)
if err := os.WriteFile(netrcPath, []byte(netrcContent), 0600); err != nil {
return false, err
}
// Run git ls-remote
cmd := exec.CommandContext(ctx, "git", "ls-remote", repoURL, "HEAD")
cmd.Env = append(os.Environ(),
"HOME="+tempDir,
"GIT_TERMINAL_PROMPT=0",
)
output, err := cmd.CombinedOutput()
if err != nil {
errMsg := string(output)
if strings.Contains(errMsg, "could not read Username") ||
strings.Contains(errMsg, "Authentication failed") ||
strings.Contains(errMsg, "not found") {
return false, fmt.Errorf("access denied: %s", strings.TrimSpace(errMsg))
}
log.Warn().Err(err).Str("module", modulePath).Msg("Git validation failed, allowing cache fallback")
return true, fmt.Errorf("validation error (allowing cache): %w", err)
}
return true, nil
}
func (v *GoValidator) validateGit(ctx context.Context, modulePath, credentials string) (bool, error) {
// Generic git validation for other providers
// Similar to GitHub validation but with generic host detection
repoURL := fmt.Sprintf("https://%s.git", modulePath)
cmd := exec.CommandContext(ctx, "git", "ls-remote", repoURL, "HEAD")
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
output, err := cmd.CombinedOutput()
if err != nil {
errMsg := string(output)
if strings.Contains(errMsg, "could not read Username") ||
strings.Contains(errMsg, "Authentication failed") ||
strings.Contains(errMsg, "not found") {
return false, fmt.Errorf("access denied: %s", strings.TrimSpace(errMsg))
}
log.Warn().Err(err).Str("module", modulePath).Msg("Git validation failed, allowing cache fallback")
return true, fmt.Errorf("validation error (allowing cache): %w", err)
}
return true, nil
}
+5 -4
View File
@@ -245,10 +245,11 @@ type HandlersConfig struct {
// GoHandlerConfig contains Go proxy configuration
type GoHandlerConfig struct {
Enabled bool `mapstructure:"enabled" json:"enabled"`
UpstreamProxy string `mapstructure:"upstream_proxy" json:"upstream_proxy"`
ChecksumDB string `mapstructure:"checksum_db" json:"checksum_db"`
VerifyChecksums bool `mapstructure:"verify_checksums" json:"verify_checksums"`
Enabled bool `mapstructure:"enabled" json:"enabled"`
UpstreamProxy string `mapstructure:"upstream_proxy" json:"upstream_proxy"`
ChecksumDB string `mapstructure:"checksum_db" json:"checksum_db"`
VerifyChecksums bool `mapstructure:"verify_checksums" json:"verify_checksums"`
GitCredentialsFile string `mapstructure:"git_credentials_file" json:"git_credentials_file"` // Path to git credentials JSON file
}
// NPMHandlerConfig contains NPM registry configuration
+2
View File
@@ -83,6 +83,8 @@ type Package struct {
DownloadCount int64 `json:"download_count"` // Download counter
Metadata map[string]string `json:"metadata"` // Additional metadata
SecurityScanned bool `json:"security_scanned"` // Has been scanned
RequiresAuth bool `json:"requires_auth"` // Package requires authentication
AuthProvider string `json:"auth_provider"` // Auth provider (github.com, npm.pkg.github.com, etc.)
}
// ScanResult represents a security scan result
+71 -10
View File
@@ -46,6 +46,8 @@ CREATE TABLE IF NOT EXISTS packages (
download_count INTEGER DEFAULT 0,
metadata TEXT,
security_scanned BOOLEAN DEFAULT 0,
requires_auth BOOLEAN DEFAULT 0,
auth_provider TEXT,
UNIQUE(registry, name, version)
);
@@ -149,11 +151,51 @@ func New(cfg Config) (*SQLiteStore, error) {
return nil, errors.Wrap(err, errors.ErrCodeStorageFailure, "failed to create SQLite schema")
}
// Run migrations for existing databases
if err := runMigrations(db); err != nil {
db.Close()
return nil, errors.Wrap(err, errors.ErrCodeStorageFailure, "failed to run database migrations")
}
return &SQLiteStore{
db: db,
}, nil
}
// runMigrations runs database migrations for existing databases
func runMigrations(db *sql.DB) error {
// Migration 1: Add requires_auth and auth_provider columns (if they don't exist)
// SQLite doesn't have IF NOT EXISTS for ALTER TABLE, so we need to check first
var columnExists int
err := db.QueryRow("SELECT COUNT(*) FROM pragma_table_info('packages') WHERE name='requires_auth'").Scan(&columnExists)
if err != nil {
return err
}
if columnExists == 0 {
log.Info().Msg("Running migration: adding requires_auth and auth_provider columns")
// Add requires_auth column
if _, err := db.Exec("ALTER TABLE packages ADD COLUMN requires_auth BOOLEAN DEFAULT 0"); err != nil {
return fmt.Errorf("failed to add requires_auth column: %w", err)
}
// Add auth_provider column
if _, err := db.Exec("ALTER TABLE packages ADD COLUMN auth_provider TEXT"); err != nil {
return fmt.Errorf("failed to add auth_provider column: %w", err)
}
// Create index
if _, err := db.Exec("CREATE INDEX IF NOT EXISTS idx_packages_requires_auth ON packages(requires_auth)"); err != nil {
return fmt.Errorf("failed to create requires_auth index: %w", err)
}
log.Info().Msg("Migration completed successfully")
}
return nil
}
// SavePackage saves package metadata
func (s *SQLiteStore) SavePackage(ctx context.Context, pkg *metadata.Package) error {
s.mu.Lock()
@@ -175,8 +217,8 @@ func (s *SQLiteStore) SavePackage(ctx context.Context, pkg *metadata.Package) er
id, registry, name, version, storage_key, size,
checksum_md5, checksum_sha256, upstream_url,
cached_at, last_accessed, expires_at, download_count,
metadata, security_scanned
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
metadata, security_scanned, requires_auth, auth_provider
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(registry, name, version) DO UPDATE SET
storage_key = excluded.storage_key,
size = excluded.size,
@@ -186,14 +228,16 @@ func (s *SQLiteStore) SavePackage(ctx context.Context, pkg *metadata.Package) er
last_accessed = excluded.last_accessed,
expires_at = excluded.expires_at,
metadata = excluded.metadata,
security_scanned = excluded.security_scanned
security_scanned = excluded.security_scanned,
requires_auth = excluded.requires_auth,
auth_provider = excluded.auth_provider
`
_, err = s.db.ExecContext(ctx, query,
pkg.ID, pkg.Registry, pkg.Name, pkg.Version, pkg.StorageKey, pkg.Size,
pkg.ChecksumMD5, pkg.ChecksumSHA256, pkg.UpstreamURL,
pkg.CachedAt, pkg.LastAccessed, expiresAt, pkg.DownloadCount,
string(metadataJSON), pkg.SecurityScanned,
string(metadataJSON), pkg.SecurityScanned, pkg.RequiresAuth, pkg.AuthProvider,
)
if err != nil {
@@ -212,7 +256,7 @@ func (s *SQLiteStore) GetPackage(ctx context.Context, registry, name, version st
SELECT id, registry, name, version, storage_key, size,
checksum_md5, checksum_sha256, upstream_url,
cached_at, last_accessed, expires_at, download_count,
metadata, security_scanned
metadata, security_scanned, requires_auth, auth_provider
FROM packages
WHERE registry = ? AND name = ? AND version = ?
`
@@ -220,12 +264,13 @@ func (s *SQLiteStore) GetPackage(ctx context.Context, registry, name, version st
var pkg metadata.Package
var metadataJSON string
var expiresAt sql.NullTime
var authProvider sql.NullString
err := s.db.QueryRowContext(ctx, query, registry, name, version).Scan(
&pkg.ID, &pkg.Registry, &pkg.Name, &pkg.Version, &pkg.StorageKey, &pkg.Size,
&pkg.ChecksumMD5, &pkg.ChecksumSHA256, &pkg.UpstreamURL,
&pkg.CachedAt, &pkg.LastAccessed, &expiresAt, &pkg.DownloadCount,
&metadataJSON, &pkg.SecurityScanned,
&metadataJSON, &pkg.SecurityScanned, &pkg.RequiresAuth, &authProvider,
)
if err == sql.ErrNoRows {
@@ -240,6 +285,10 @@ func (s *SQLiteStore) GetPackage(ctx context.Context, registry, name, version st
pkg.ExpiresAt = &expiresAt.Time
}
if authProvider.Valid {
pkg.AuthProvider = authProvider.String
}
// Deserialize metadata
if metadataJSON != "" {
if err := goccy_json.Unmarshal([]byte(metadataJSON), &pkg.Metadata); err != nil {
@@ -516,6 +565,7 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg
COUNT(*) as download_count
FROM download_events
WHERE downloaded_at >= ?
AND downloaded_at IS NOT NULL
`
args = []interface{}{timeFormat, startTime}
@@ -526,6 +576,7 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg
query += `
GROUP BY time_bucket
HAVING time_bucket IS NOT NULL
ORDER BY time_bucket ASC
`
} else {
@@ -535,7 +586,9 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg
time_bucket,
SUM(download_count) as download_count
FROM aggregated_download_stats
WHERE resolution = ? AND time_bucket >= ?
WHERE resolution = ?
AND time_bucket >= ?
AND time_bucket IS NOT NULL
`
args = []interface{}{useResolution, startTime}
@@ -559,12 +612,15 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg
// Collect data points
dataMap := make(map[string]int64)
for rows.Next() {
var bucket string
var bucket sql.NullString
var count int64
if err := rows.Scan(&bucket, &count); err != nil {
return nil, errors.Wrap(err, errors.ErrCodeStorageFailure, "failed to scan time-series data")
}
dataMap[bucket] = count
// Skip NULL buckets (shouldn't happen with NOT NULL constraint, but defensive)
if bucket.Valid && bucket.String != "" {
dataMap[bucket.String] = count
}
}
if err := rows.Err(); err != nil {
@@ -654,7 +710,9 @@ func (s *SQLiteStore) AggregateDownloadData(ctx context.Context) error {
COUNT(*) as download_count
FROM download_events
WHERE downloaded_at < ?
AND downloaded_at IS NOT NULL
GROUP BY registry, time_bucket
HAVING time_bucket IS NOT NULL
`
_, err = tx.ExecContext(ctx, hourlyAggQuery, oneHourAgo)
if err != nil {
@@ -680,8 +738,11 @@ func (s *SQLiteStore) AggregateDownloadData(ctx context.Context) error {
'daily' as resolution,
SUM(download_count) as download_count
FROM aggregated_download_stats
WHERE resolution = 'hourly' AND time_bucket < ?
WHERE resolution = 'hourly'
AND time_bucket < ?
AND time_bucket IS NOT NULL
GROUP BY registry, strftime('%Y-%m-%d 00:00:00', time_bucket)
HAVING time_bucket IS NOT NULL
`
_, err = tx.ExecContext(ctx, dailyAggQuery, oneDayAgo)
if err != nil {
+206 -20
View File
@@ -6,25 +6,35 @@ import (
"io"
"net/http"
"strings"
"time"
"github.com/lukaszraczylo/gohoarder/pkg/auth"
"github.com/lukaszraczylo/gohoarder/pkg/cache"
"github.com/lukaszraczylo/gohoarder/pkg/errors"
"github.com/lukaszraczylo/gohoarder/pkg/network"
"github.com/lukaszraczylo/gohoarder/pkg/vcs"
"github.com/rs/zerolog/log"
)
// Handler implements the GOPROXY protocol
type Handler struct {
cache *cache.Manager
client *network.Client
upstream string
sumDBURL string
cache *cache.Manager
client *network.Client
upstream string
sumDBURL string
credExtractor *auth.CredentialExtractor
credHasher *auth.CredentialHasher
credValidator *auth.GoValidator
validationCache *auth.ValidationCache
gitFetcher *vcs.GitFetcher
moduleBuilder *vcs.ModuleBuilder
}
// Config holds Go proxy configuration
type Config struct {
Upstream string // Upstream Go proxy (e.g., proxy.golang.org)
SumDBURL string // Checksum database URL
Upstream string // Upstream Go proxy (e.g., proxy.golang.org)
SumDBURL string // Checksum database URL
CredStore *vcs.CredentialStore // Optional credential store for git access
}
// New creates a new Go proxy handler
@@ -37,11 +47,23 @@ func New(cacheManager *cache.Manager, client *network.Client, config Config) *Ha
config.SumDBURL = "https://sum.golang.org"
}
// Use provided credential store or create empty one
credStore := config.CredStore
if credStore == nil {
credStore = vcs.NewCredentialStore()
}
return &Handler{
cache: cacheManager,
client: client,
upstream: config.Upstream,
sumDBURL: config.SumDBURL,
cache: cacheManager,
client: client,
upstream: config.Upstream,
sumDBURL: config.SumDBURL,
credExtractor: auth.NewCredentialExtractor(),
credHasher: auth.NewCredentialHasher(),
credValidator: auth.NewGoValidator(),
validationCache: auth.NewValidationCache(5 * time.Minute),
gitFetcher: vcs.NewGitFetcher("", credStore),
moduleBuilder: vcs.NewModuleBuilder(),
}
}
@@ -88,8 +110,17 @@ func (h *Handler) handleList(ctx context.Context, w http.ResponseWriter, r *http
url := h.upstream + path
modulePath := h.extractModulePath(path)
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
entry, err := h.cache.Get(ctx, "go", modulePath, "list", func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, url, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
body, statusCode, err := h.client.Get(ctx, url, headers)
if err != nil {
return nil, "", err
}
@@ -119,8 +150,17 @@ func (h *Handler) handleInfo(ctx context.Context, w http.ResponseWriter, r *http
// Use .info suffix to distinguish from .mod and .zip in cache
cacheKey := modulePath + "/@v/" + version + ".info"
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
entry, err := h.cache.Get(ctx, "go", cacheKey, version, func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, url, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
body, statusCode, err := h.client.Get(ctx, url, headers)
if err != nil {
return nil, "", err
}
@@ -150,8 +190,17 @@ func (h *Handler) handleMod(ctx context.Context, w http.ResponseWriter, r *http.
// Use .mod suffix to distinguish from .info and .zip in cache
cacheKey := modulePath + "/@v/" + version + ".mod"
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
entry, err := h.cache.Get(ctx, "go", cacheKey, version, func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, url, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
body, statusCode, err := h.client.Get(ctx, url, headers)
if err != nil {
return nil, "", err
}
@@ -181,16 +230,55 @@ func (h *Handler) handleZip(ctx context.Context, w http.ResponseWriter, r *http.
// Use .zip suffix to distinguish from .info and .mod in cache
cacheKey := modulePath + "/@v/" + version + ".zip"
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
credHash := h.credHasher.Hash(credentials)
log.Debug().
Str("path", path).
Str("module", modulePath).
Str("version", version).
Str("url", url).
Str("cred_hash", credHash).
Bool("has_credentials", credentials != "").
Msg("Handling Go module zip request")
entry, err := h.cache.Get(ctx, "go", cacheKey, version, func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, url, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
// Try upstream proxy first (fast path for public modules)
body, statusCode, err := h.client.Get(ctx, url, headers)
if err == nil && statusCode == http.StatusOK {
return body, url, nil
}
// If upstream failed with 404 or 403, try git fallback (private modules)
if statusCode == http.StatusNotFound || statusCode == http.StatusForbidden {
if body != nil {
body.Close()
}
log.Debug().
Str("module", modulePath).
Str("version", version).
Int("upstream_status", statusCode).
Msg("Upstream proxy returned not found, trying git fallback")
return h.fetchModuleFromGit(ctx, modulePath, version, credentials)
}
// Other errors
if body != nil {
body.Close()
}
if err != nil {
return nil, "", err
}
if statusCode != http.StatusOK {
body.Close()
return nil, "", fmt.Errorf("upstream returned status %d", statusCode)
}
return body, url, nil
return nil, "", fmt.Errorf("upstream returned status %d", statusCode)
})
if err != nil {
@@ -208,6 +296,58 @@ func (h *Handler) handleZip(ctx context.Context, w http.ResponseWriter, r *http.
}
defer entry.Data.Close()
// CRITICAL SECURITY CHECK: If module requires auth, validate credentials
if entry.Package != nil && entry.Package.RequiresAuth {
// Check validation cache first
allowed, cached, reason := h.validationCache.Get(credHash, modulePath)
if cached {
if !allowed {
log.Warn().
Str("module", modulePath).
Str("version", version).
Str("reason", reason).
Msg("Access denied (cached validation)")
http.Error(w, "Module not found", http.StatusNotFound)
return
}
log.Debug().
Str("module", modulePath).
Str("version", version).
Msg("Access granted (cached validation)")
} else {
// Validate with upstream using git ls-remote
log.Debug().
Str("module", modulePath).
Str("version", version).
Str("provider", entry.Package.AuthProvider).
Msg("Validating credentials with upstream")
allowed, err := h.credValidator.ValidateAccess(ctx, modulePath, credentials)
if err != nil {
reason = err.Error()
}
// Cache validation result
h.validationCache.Set(credHash, modulePath, allowed, reason)
if !allowed {
log.Warn().
Str("module", modulePath).
Str("version", version).
Err(err).
Msg("Access denied by upstream")
// Return 404 (same as GitHub does for private repos)
http.Error(w, "Module not found", http.StatusNotFound)
return
}
log.Debug().
Str("module", modulePath).
Str("version", version).
Msg("Access granted by upstream")
}
}
w.Header().Set("Content-Type", "application/zip")
io.Copy(w, entry.Data)
}
@@ -217,8 +357,17 @@ func (h *Handler) handleLatest(ctx context.Context, w http.ResponseWriter, r *ht
url := h.upstream + path
modulePath := h.extractModulePath(path)
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
entry, err := h.cache.Get(ctx, "go", modulePath, "latest", func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, url, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
body, statusCode, err := h.client.Get(ctx, url, headers)
if err != nil {
return nil, "", err
}
@@ -297,3 +446,40 @@ func (h *Handler) extractModulePath(path string) string {
// Fallback: remove /@latest suffix if present
return strings.TrimSuffix(path, "/@latest")
}
// fetchModuleFromGit fetches a Go module directly from git repository
func (h *Handler) fetchModuleFromGit(ctx context.Context, modulePath, version, credentials string) (io.ReadCloser, string, error) {
log.Info().
Str("module", modulePath).
Str("version", version).
Msg("Fetching module from git repository")
// 1. Fetch module source from git
srcPath, err := h.gitFetcher.FetchModule(ctx, modulePath, version, credentials)
if err != nil {
return nil, "", fmt.Errorf("git fetch failed: %w", err)
}
defer h.gitFetcher.Cleanup(srcPath)
// 2. Validate module
if err := h.moduleBuilder.ValidateModule(ctx, srcPath, modulePath); err != nil {
return nil, "", fmt.Errorf("module validation failed: %w", err)
}
// 3. Build module zip
zipReader, err := h.moduleBuilder.BuildModuleZip(ctx, srcPath, modulePath, version)
if err != nil {
return nil, "", fmt.Errorf("module zip build failed: %w", err)
}
// Create source URL for logging
sourceURL := fmt.Sprintf("git+https://%s@%s", modulePath, version)
log.Info().
Str("module", modulePath).
Str("version", version).
Str("source", sourceURL).
Msg("Successfully built module from git")
return zipReader, sourceURL, nil
}
+81 -7
View File
@@ -8,7 +8,9 @@ import (
"io"
"net/http"
"strings"
"time"
"github.com/lukaszraczylo/gohoarder/pkg/auth"
"github.com/lukaszraczylo/gohoarder/pkg/cache"
"github.com/lukaszraczylo/gohoarder/pkg/errors"
"github.com/lukaszraczylo/gohoarder/pkg/network"
@@ -17,9 +19,13 @@ import (
// Handler implements the NPM registry protocol
type Handler struct {
cache *cache.Manager
client *network.Client
upstream string
cache *cache.Manager
client *network.Client
upstream string
credExtractor *auth.CredentialExtractor
credHasher *auth.CredentialHasher
credValidator *auth.NPMValidator
validationCache *auth.ValidationCache
}
// Config holds NPM proxy configuration
@@ -34,9 +40,13 @@ func New(cacheManager *cache.Manager, client *network.Client, config Config) *Ha
}
return &Handler{
cache: cacheManager,
client: client,
upstream: config.Upstream,
cache: cacheManager,
client: client,
upstream: config.Upstream,
credExtractor: auth.NewCredentialExtractor(),
credHasher: auth.NewCredentialHasher(),
credValidator: auth.NewNPMValidator(),
validationCache: auth.NewValidationCache(5 * time.Minute),
}
}
@@ -123,6 +133,10 @@ func (h *Handler) handleMetadata(ctx context.Context, w http.ResponseWriter, r *
func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) {
packageName, version := extractTarballInfo(path)
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
credHash := h.credHasher.Hash(credentials)
// Construct proper upstream URL with /-/ format
// Format: https://registry.npmjs.org/package/-/package-version.tgz
tarballFilename := strings.ReplaceAll(packageName, "/", "-") + "-" + version + ".tgz"
@@ -133,10 +147,19 @@ func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *h
Str("package", packageName).
Str("version", version).
Str("upstream_url", url).
Str("cred_hash", credHash).
Bool("has_credentials", credentials != "").
Msg("Handling tarball request")
// Try to get from cache first (with credential-aware key)
entry, err := h.cache.Get(ctx, "npm", packageName, version, func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, url, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
body, statusCode, err := h.client.Get(ctx, url, headers)
if err != nil {
return nil, "", err
}
@@ -162,6 +185,57 @@ func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *h
}
defer entry.Data.Close()
// CRITICAL SECURITY CHECK: If package requires auth, validate credentials
if entry.Package != nil && entry.Package.RequiresAuth {
// Check validation cache first
allowed, cached, reason := h.validationCache.Get(credHash, url)
if cached {
if !allowed {
log.Warn().
Str("package", packageName).
Str("version", version).
Str("reason", reason).
Msg("Access denied (cached validation)")
http.Error(w, "Access denied", http.StatusForbidden)
return
}
log.Debug().
Str("package", packageName).
Str("version", version).
Msg("Access granted (cached validation)")
} else {
// Validate with upstream
log.Debug().
Str("package", packageName).
Str("version", version).
Str("provider", entry.Package.AuthProvider).
Msg("Validating credentials with upstream")
allowed, err := h.credValidator.ValidateAccess(ctx, url, credentials)
if err != nil {
reason = err.Error()
}
// Cache validation result
h.validationCache.Set(credHash, url, allowed, reason)
if !allowed {
log.Warn().
Str("package", packageName).
Str("version", version).
Err(err).
Msg("Access denied by upstream")
http.Error(w, "Access denied", http.StatusForbidden)
return
}
log.Debug().
Str("package", packageName).
Str("version", version).
Msg("Access granted by upstream")
}
}
w.Header().Set("Content-Type", "application/octet-stream")
io.Copy(w, entry.Data)
}
+87 -7
View File
@@ -8,7 +8,9 @@ import (
"net/http"
"regexp"
"strings"
"time"
"github.com/lukaszraczylo/gohoarder/pkg/auth"
"github.com/lukaszraczylo/gohoarder/pkg/cache"
"github.com/lukaszraczylo/gohoarder/pkg/errors"
"github.com/lukaszraczylo/gohoarder/pkg/network"
@@ -17,9 +19,13 @@ import (
// Handler implements the PyPI Simple API (PEP 503)
type Handler struct {
cache *cache.Manager
client *network.Client
upstream string
cache *cache.Manager
client *network.Client
upstream string
credExtractor *auth.CredentialExtractor
credHasher *auth.CredentialHasher
credValidator *auth.PyPIValidator
validationCache *auth.ValidationCache
}
// Config holds PyPI proxy configuration
@@ -34,9 +40,13 @@ func New(cacheManager *cache.Manager, client *network.Client, config Config) *Ha
}
return &Handler{
cache: cacheManager,
client: client,
upstream: config.Upstream,
cache: cacheManager,
client: client,
upstream: config.Upstream,
credExtractor: auth.NewCredentialExtractor(),
credHasher: auth.NewCredentialHasher(),
credValidator: auth.NewPyPIValidator(),
validationCache: auth.NewValidationCache(5 * time.Minute),
}
}
@@ -138,6 +148,10 @@ func (h *Handler) handlePackagePage(ctx context.Context, w http.ResponseWriter,
func (h *Handler) handlePackageFile(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) {
packageName, version := extractPackageFileInfo(path)
// Extract credentials from request
credentials := h.credExtractor.Extract(r)
credHash := h.credHasher.Hash(credentials)
// Check if we have the original URL from the rewritten package page
originalURL := r.URL.Query().Get("original_url")
@@ -152,8 +166,23 @@ func (h *Handler) handlePackageFile(ctx context.Context, w http.ResponseWriter,
}
}
log.Debug().
Str("path", path).
Str("package", packageName).
Str("version", version).
Str("url", originalURL).
Str("cred_hash", credHash).
Bool("has_credentials", credentials != "").
Msg("Handling PyPI package file request")
entry, err := h.cache.Get(ctx, "pypi", packageName, version, func(ctx context.Context) (io.ReadCloser, string, error) {
body, statusCode, err := h.client.Get(ctx, originalURL, nil)
// Prepare headers for upstream request
headers := make(map[string]string)
if credentials != "" {
headers["Authorization"] = credentials
}
body, statusCode, err := h.client.Get(ctx, originalURL, headers)
if err != nil {
return nil, "", err
}
@@ -179,6 +208,57 @@ func (h *Handler) handlePackageFile(ctx context.Context, w http.ResponseWriter,
}
defer entry.Data.Close()
// CRITICAL SECURITY CHECK: If package requires auth, validate credentials
if entry.Package != nil && entry.Package.RequiresAuth {
// Check validation cache first
allowed, cached, reason := h.validationCache.Get(credHash, originalURL)
if cached {
if !allowed {
log.Warn().
Str("package", packageName).
Str("version", version).
Str("reason", reason).
Msg("Access denied (cached validation)")
http.Error(w, "Access denied", http.StatusForbidden)
return
}
log.Debug().
Str("package", packageName).
Str("version", version).
Msg("Access granted (cached validation)")
} else {
// Validate with upstream
log.Debug().
Str("package", packageName).
Str("version", version).
Str("provider", entry.Package.AuthProvider).
Msg("Validating credentials with upstream")
allowed, err := h.credValidator.ValidateAccess(ctx, originalURL, credentials)
if err != nil {
reason = err.Error()
}
// Cache validation result
h.validationCache.Set(credHash, originalURL, allowed, reason)
if !allowed {
log.Warn().
Str("package", packageName).
Str("version", version).
Err(err).
Msg("Access denied by upstream")
http.Error(w, "Access denied", http.StatusForbidden)
return
}
log.Debug().
Str("package", packageName).
Str("version", version).
Msg("Access granted by upstream")
}
}
// Determine content type based on file extension
contentType := "application/octet-stream"
if strings.HasSuffix(path, ".whl") {
+247
View File
@@ -0,0 +1,247 @@
package vcs
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/rs/zerolog/log"
)
// CredentialStore manages git credentials for different repository patterns
type CredentialStore struct {
credentials []CredentialEntry
}
// CredentialEntry represents credentials for a specific pattern
type CredentialEntry struct {
Pattern string `json:"pattern"` // Glob pattern: "github.com/myorg/*"
Host string `json:"host"` // Git host: "github.com"
Username string `json:"username"` // Usually "oauth2" for tokens
Token string `json:"token"` // Access token
Fallback bool `json:"fallback"` // Use as fallback if no match
}
// CredentialConfig represents the JSON configuration format
type CredentialConfig struct {
Credentials []CredentialEntry `json:"credentials"`
}
// NewCredentialStore creates a new credential store
func NewCredentialStore() *CredentialStore {
return &CredentialStore{
credentials: make([]CredentialEntry, 0),
}
}
// LoadFromFile loads credentials from a JSON file
func (cs *CredentialStore) LoadFromFile(path string) error {
if path == "" {
log.Debug().Msg("No credential file specified, using system git config")
return nil
}
// Check if file exists
if _, err := os.Stat(path); os.IsNotExist(err) {
log.Warn().Str("path", path).Msg("Credential file not found, using system git config")
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read credential file: %w", err)
}
var config CredentialConfig
if err := json.Unmarshal(data, &config); err != nil {
return fmt.Errorf("failed to parse credential file: %w", err)
}
cs.credentials = config.Credentials
log.Info().
Str("file", path).
Int("credentials", len(cs.credentials)).
Msg("Loaded git credentials from file")
// Log patterns (not tokens!) for debugging
for i, cred := range cs.credentials {
log.Debug().
Int("index", i).
Str("pattern", cred.Pattern).
Str("host", cred.Host).
Bool("fallback", cred.Fallback).
Msg("Registered credential pattern")
}
return nil
}
// GetCredentialsForModule finds the best matching credentials for a module path
// Returns (username, token, found)
func (cs *CredentialStore) GetCredentialsForModule(modulePath string) (string, string, bool) {
if len(cs.credentials) == 0 {
// No credentials configured, rely on system git config
return "", "", false
}
// Find best match
var bestMatch *CredentialEntry
var fallbackMatch *CredentialEntry
bestMatchLen := 0
for i := range cs.credentials {
cred := &cs.credentials[i]
// Check for fallback
if cred.Fallback {
fallbackMatch = cred
continue
}
// Check if pattern matches
if cs.matchPattern(cred.Pattern, modulePath) {
// Use longest matching pattern (most specific)
if len(cred.Pattern) > bestMatchLen {
bestMatch = cred
bestMatchLen = len(cred.Pattern)
}
}
}
// Use best match if found
if bestMatch != nil {
log.Debug().
Str("module", modulePath).
Str("pattern", bestMatch.Pattern).
Str("host", bestMatch.Host).
Msg("Matched credential pattern")
return bestMatch.Username, bestMatch.Token, true
}
// Use fallback if available
if fallbackMatch != nil {
log.Debug().
Str("module", modulePath).
Str("pattern", fallbackMatch.Pattern).
Msg("Using fallback credentials")
return fallbackMatch.Username, fallbackMatch.Token, true
}
// No match found
log.Debug().
Str("module", modulePath).
Msg("No credential pattern matched, using system git config")
return "", "", false
}
// matchPattern checks if a module path matches a credential pattern
// Supports glob-style patterns:
// - github.com/myorg/* matches github.com/myorg/repo1, github.com/myorg/repo2
// - github.com/myorg/repo matches exactly github.com/myorg/repo
// - * matches everything
func (cs *CredentialStore) matchPattern(pattern, modulePath string) bool {
// Exact match
if pattern == modulePath {
return true
}
// Wildcard match all
if pattern == "*" {
return true
}
// Glob-style matching
matched, err := filepath.Match(pattern, modulePath)
if err != nil {
log.Warn().Err(err).Str("pattern", pattern).Msg("Invalid pattern")
return false
}
if matched {
return true
}
// Prefix matching with /*
if strings.HasSuffix(pattern, "/*") {
prefix := strings.TrimSuffix(pattern, "/*")
return strings.HasPrefix(modulePath, prefix+"/")
}
return false
}
// CreateNetrcContent creates .netrc file content for a specific host
func (cs *CredentialStore) CreateNetrcContent(host, username, token string) string {
return fmt.Sprintf("machine %s\nlogin %s\npassword %s\n", host, username, token)
}
// GetCredentialsForHost finds credentials for a specific git host (e.g., "github.com")
// This is useful when you need credentials for a host but don't have a full module path
func (cs *CredentialStore) GetCredentialsForHost(host string) (string, string, bool) {
if len(cs.credentials) == 0 {
return "", "", false
}
// Look for exact host match first
for i := range cs.credentials {
cred := &cs.credentials[i]
if cred.Host == host && !cred.Fallback {
log.Debug().
Str("host", host).
Str("pattern", cred.Pattern).
Msg("Found credentials for host")
return cred.Username, cred.Token, true
}
}
// Try fallback
for i := range cs.credentials {
cred := &cs.credentials[i]
if cred.Fallback {
log.Debug().
Str("host", host).
Msg("Using fallback credentials for host")
return cred.Username, cred.Token, true
}
}
return "", "", false
}
// ValidateConfig validates the credential configuration
func (cs *CredentialStore) ValidateConfig() error {
hostPatterns := make(map[string]bool)
for i, cred := range cs.credentials {
// Check required fields
if cred.Pattern == "" {
return fmt.Errorf("credential entry %d: pattern is required", i)
}
if cred.Host == "" && cred.Pattern != "*" {
return fmt.Errorf("credential entry %d: host is required (pattern: %s)", i, cred.Pattern)
}
if cred.Token == "" {
return fmt.Errorf("credential entry %d: token is required (pattern: %s)", i, cred.Pattern)
}
// Set default username if not provided
if cred.Username == "" {
cs.credentials[i].Username = "oauth2"
}
// Check for duplicate patterns
key := cred.Pattern + ":" + cred.Host
if hostPatterns[key] && !cred.Fallback {
log.Warn().
Str("pattern", cred.Pattern).
Str("host", cred.Host).
Msg("Duplicate credential pattern, last one wins")
}
hostPatterns[key] = true
}
return nil
}
+283
View File
@@ -0,0 +1,283 @@
package vcs
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/rs/zerolog/log"
)
// GitFetcher handles git repository operations
type GitFetcher struct {
workDir string
timeout time.Duration
credStore *CredentialStore
}
// NewGitFetcher creates a new git fetcher
func NewGitFetcher(workDir string, credStore *CredentialStore) *GitFetcher {
if workDir == "" {
workDir = os.TempDir()
}
if credStore == nil {
credStore = NewCredentialStore()
}
return &GitFetcher{
workDir: workDir,
timeout: 30 * time.Second,
credStore: credStore,
}
}
// FetchModule clones a git repository and checks out a specific version
// Returns the path to the checked-out source directory
func (g *GitFetcher) FetchModule(ctx context.Context, modulePath, version, credentials string) (string, error) {
// Create context with timeout
ctx, cancel := context.WithTimeout(ctx, g.timeout)
defer cancel()
// Parse module path to extract repository URL
repoURL, err := g.modulePathToRepoURL(modulePath)
if err != nil {
return "", err
}
// Create temporary directory for this clone
cloneDir, err := os.MkdirTemp(g.workDir, "gohoarder-git-*")
if err != nil {
return "", fmt.Errorf("failed to create temp directory: %w", err)
}
log.Debug().
Str("module", modulePath).
Str("version", version).
Str("repo_url", repoURL).
Str("clone_dir", cloneDir).
Msg("Fetching module from git")
// Set up credentials
credentialHelper, cleanup, err := g.setupCredentials(repoURL, modulePath, credentials)
if err != nil {
os.RemoveAll(cloneDir)
return "", fmt.Errorf("failed to setup credentials: %w", err)
}
defer cleanup()
// Try shallow clone with specific version first (fastest)
if err := g.shallowClone(ctx, repoURL, version, cloneDir, credentialHelper); err != nil {
log.Debug().Err(err).Msg("Shallow clone failed, trying full clone")
// Fallback to full clone
if err := g.fullClone(ctx, repoURL, cloneDir, credentialHelper); err != nil {
os.RemoveAll(cloneDir)
return "", fmt.Errorf("git clone failed: %w", err)
}
// Checkout specific version
if err := g.checkout(ctx, cloneDir, version); err != nil {
os.RemoveAll(cloneDir)
return "", fmt.Errorf("git checkout failed: %w", err)
}
}
log.Debug().
Str("module", modulePath).
Str("version", version).
Str("path", cloneDir).
Msg("Successfully fetched module from git")
return cloneDir, nil
}
// modulePathToRepoURL converts a Go module path to a git repository URL
// Examples:
// github.com/user/repo → https://github.com/user/repo.git
// gitlab.com/group/project → https://gitlab.com/group/project.git
func (g *GitFetcher) modulePathToRepoURL(modulePath string) (string, error) {
// Remove any path components after the repository
// e.g., github.com/user/repo/v2 → github.com/user/repo
parts := strings.Split(modulePath, "/")
if len(parts) < 3 {
return "", fmt.Errorf("invalid module path: %s", modulePath)
}
// For github.com, gitlab.com, bitbucket.org, etc.
// Format: host/owner/repo
host := parts[0]
owner := parts[1]
repo := parts[2]
// Remove version suffix if present (e.g., /v2, /v3)
repo = strings.TrimPrefix(repo, "v")
if strings.HasPrefix(repo, "2") || strings.HasPrefix(repo, "3") {
// This might be a version suffix, but we need to be careful
// For now, keep it as-is
}
repoURL := fmt.Sprintf("https://%s/%s/%s.git", host, owner, repo)
return repoURL, nil
}
// setupCredentials configures git credentials for authentication
// Returns credential helper configuration and cleanup function
func (g *GitFetcher) setupCredentials(repoURL, modulePath, credentials string) (map[string]string, func(), error) {
env := make(map[string]string)
cleanup := func() {}
// Priority 1: Check credential store for pattern-based credentials
if g.credStore != nil {
username, token, found := g.credStore.GetCredentialsForModule(modulePath)
if found {
log.Debug().
Str("module", modulePath).
Msg("Using credentials from credential store")
return g.createTempNetrc(repoURL, username, token)
}
}
// Priority 2: Use credentials from HTTP Authorization header (if provided)
if credentials != "" {
log.Debug().Msg("Using credentials from Authorization header")
return g.createTempNetrcFromHeader(repoURL, credentials)
}
// Priority 3: Rely on system git config (.netrc, etc.)
log.Debug().Msg("No credentials provided, using system git config")
return env, cleanup, nil
}
// createTempNetrc creates a temporary .netrc file with the provided credentials
func (g *GitFetcher) createTempNetrc(repoURL, username, token string) (map[string]string, func(), error) {
// Create temporary .netrc file
tempDir, err := os.MkdirTemp("", "gohoarder-netrc-*")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp netrc directory: %w", err)
}
// Extract host from repo URL
host := g.extractHost(repoURL)
// Create .netrc file
netrcPath := filepath.Join(tempDir, ".netrc")
netrcContent := fmt.Sprintf("machine %s\nlogin %s\npassword %s\n", host, username, token)
if err := os.WriteFile(netrcPath, []byte(netrcContent), 0600); err != nil {
os.RemoveAll(tempDir)
return nil, nil, fmt.Errorf("failed to write .netrc: %w", err)
}
env := map[string]string{
"HOME": tempDir,
"GIT_TERMINAL_PROMPT": "0",
}
cleanup := func() {
os.RemoveAll(tempDir)
}
log.Debug().Str("host", host).Msg("Created temporary .netrc for git authentication")
return env, cleanup, nil
}
// createTempNetrcFromHeader creates a temporary .netrc from Authorization header credentials
func (g *GitFetcher) createTempNetrcFromHeader(repoURL, credentials string) (map[string]string, func(), error) {
// Extract token from credentials
token := strings.TrimPrefix(credentials, "Bearer ")
token = strings.TrimPrefix(token, "Token ")
token = strings.TrimPrefix(token, "Private-Token ")
if token == "" || token == credentials {
// Not in expected format, rely on system config
log.Debug().Msg("Credentials not in Bearer/Token format, using system git config")
return make(map[string]string), func() {}, nil
}
// Use oauth2 as default username for token-based auth
return g.createTempNetrc(repoURL, "oauth2", token)
}
// extractHost extracts the git host from a repository URL
func (g *GitFetcher) extractHost(repoURL string) string {
if strings.Contains(repoURL, "github.com") {
return "github.com"
}
if strings.Contains(repoURL, "gitlab.com") {
return "gitlab.com"
}
if strings.Contains(repoURL, "bitbucket.org") {
return "bitbucket.org"
}
// Generic extraction
parts := strings.Split(repoURL, "/")
if len(parts) >= 3 {
return strings.TrimPrefix(parts[2], "//")
}
return ""
}
// shallowClone performs a shallow clone of a specific version
func (g *GitFetcher) shallowClone(ctx context.Context, repoURL, version, cloneDir string, credentialHelper map[string]string) error {
cmd := exec.CommandContext(ctx, "git", "clone", "--depth", "1", "--branch", version, repoURL, cloneDir)
cmd.Env = append(os.Environ(), g.envMapToSlice(credentialHelper)...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("shallow clone failed: %w (output: %s)", err, string(output))
}
return nil
}
// fullClone performs a full clone of the repository
func (g *GitFetcher) fullClone(ctx context.Context, repoURL, cloneDir string, credentialHelper map[string]string) error {
cmd := exec.CommandContext(ctx, "git", "clone", repoURL, cloneDir)
cmd.Env = append(os.Environ(), g.envMapToSlice(credentialHelper)...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("full clone failed: %w (output: %s)", err, string(output))
}
return nil
}
// checkout checks out a specific version (tag, branch, or commit)
func (g *GitFetcher) checkout(ctx context.Context, repoDir, version string) error {
cmd := exec.CommandContext(ctx, "git", "checkout", version)
cmd.Dir = repoDir
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("checkout failed: %w (output: %s)", err, string(output))
}
return nil
}
// envMapToSlice converts environment map to slice
func (g *GitFetcher) envMapToSlice(envMap map[string]string) []string {
var env []string
for k, v := range envMap {
env = append(env, fmt.Sprintf("%s=%s", k, v))
}
return env
}
// Cleanup removes temporary directories
func (g *GitFetcher) Cleanup(paths ...string) {
for _, path := range paths {
if err := os.RemoveAll(path); err != nil {
log.Warn().Err(err).Str("path", path).Msg("Failed to cleanup temporary directory")
}
}
}
+252
View File
@@ -0,0 +1,252 @@
package vcs
import (
"archive/zip"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"github.com/rs/zerolog/log"
)
// ModuleBuilder builds Go module artifacts from source
type ModuleBuilder struct{}
// NewModuleBuilder creates a new module builder
func NewModuleBuilder() *ModuleBuilder {
return &ModuleBuilder{}
}
// ModuleInfo represents Go module version metadata (.info file)
type ModuleInfo struct {
Version string `json:"Version"`
Time time.Time `json:"Time"`
}
// BuildModuleZip creates a Go module zip from source directory
// Follows the Go module zip format specification: https://go.dev/ref/mod#zip-files
func (b *ModuleBuilder) BuildModuleZip(ctx context.Context, srcPath, modulePath, version string) (io.ReadCloser, error) {
log.Debug().
Str("src_path", srcPath).
Str("module", modulePath).
Str("version", version).
Msg("Building module zip")
// Create in-memory zip
var buf bytes.Buffer
zipWriter := zip.NewWriter(&buf)
// Collect all files to include in zip
files, err := b.collectFiles(srcPath)
if err != nil {
return nil, fmt.Errorf("failed to collect files: %w", err)
}
// Sort files for deterministic zip
sort.Strings(files)
// Add files to zip with proper prefix
prefix := fmt.Sprintf("%s@%s/", modulePath, version)
for _, relPath := range files {
if err := b.addFileToZip(zipWriter, srcPath, relPath, prefix); err != nil {
zipWriter.Close()
return nil, fmt.Errorf("failed to add file %s: %w", relPath, err)
}
}
if err := zipWriter.Close(); err != nil {
return nil, fmt.Errorf("failed to close zip writer: %w", err)
}
log.Debug().
Str("module", modulePath).
Str("version", version).
Int("files", len(files)).
Int("size", buf.Len()).
Msg("Successfully built module zip")
return io.NopCloser(bytes.NewReader(buf.Bytes())), nil
}
// collectFiles walks the source directory and collects files to include
func (b *ModuleBuilder) collectFiles(srcPath string) ([]string, error) {
var files []string
err := filepath.Walk(srcPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Skip directories
if info.IsDir() {
// Skip .git directory
if info.Name() == ".git" {
return filepath.SkipDir
}
// Skip vendor directory (per Go module zip spec)
if info.Name() == "vendor" {
return filepath.SkipDir
}
return nil
}
// Get relative path
relPath, err := filepath.Rel(srcPath, path)
if err != nil {
return err
}
// Skip hidden files (except .gitignore, etc. if needed)
if strings.HasPrefix(filepath.Base(relPath), ".") && relPath != ".gitignore" {
return nil
}
// Include file
files = append(files, relPath)
return nil
})
if err != nil {
return nil, err
}
return files, nil
}
// addFileToZip adds a single file to the zip archive
func (b *ModuleBuilder) addFileToZip(zipWriter *zip.Writer, srcPath, relPath, prefix string) error {
// Create zip header
header := &zip.FileHeader{
Name: prefix + filepath.ToSlash(relPath),
Method: zip.Deflate,
}
// Get file info for permissions
fullPath := filepath.Join(srcPath, relPath)
info, err := os.Stat(fullPath)
if err != nil {
return err
}
// Set modification time to a fixed value for deterministic zips
// Go uses the timestamp from the version info
header.Modified = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
header.SetMode(info.Mode())
// Create file in zip
writer, err := zipWriter.CreateHeader(header)
if err != nil {
return err
}
// Copy file contents
file, err := os.Open(fullPath)
if err != nil {
return err
}
defer file.Close()
if _, err := io.Copy(writer, file); err != nil {
return err
}
return nil
}
// GenerateModInfo creates .info file (JSON metadata)
func (b *ModuleBuilder) GenerateModInfo(ctx context.Context, srcPath, version string) ([]byte, error) {
// Get commit timestamp from git
timestamp, err := b.getGitCommitTime(srcPath)
if err != nil {
// Fallback to current time if git info not available
log.Warn().Err(err).Msg("Failed to get git commit time, using current time")
timestamp = time.Now()
}
info := ModuleInfo{
Version: version,
Time: timestamp,
}
data, err := json.Marshal(info)
if err != nil {
return nil, fmt.Errorf("failed to marshal module info: %w", err)
}
return data, nil
}
// getGitCommitTime retrieves the commit timestamp from git
func (b *ModuleBuilder) getGitCommitTime(repoPath string) (time.Time, error) {
cmd := exec.Command("git", "log", "-1", "--format=%cI")
cmd.Dir = repoPath
output, err := cmd.Output()
if err != nil {
return time.Time{}, err
}
// Parse ISO 8601 timestamp
timestamp, err := time.Parse(time.RFC3339, strings.TrimSpace(string(output)))
if err != nil {
return time.Time{}, err
}
return timestamp, nil
}
// ExtractGoMod extracts go.mod content
func (b *ModuleBuilder) ExtractGoMod(ctx context.Context, srcPath string) ([]byte, error) {
goModPath := filepath.Join(srcPath, "go.mod")
data, err := os.ReadFile(goModPath)
if err != nil {
return nil, fmt.Errorf("failed to read go.mod: %w", err)
}
// Validate go.mod (basic check)
if !strings.Contains(string(data), "module ") {
return nil, fmt.Errorf("invalid go.mod: missing module directive")
}
return data, nil
}
// ValidateModule performs basic validation on the module
func (b *ModuleBuilder) ValidateModule(ctx context.Context, srcPath, expectedModulePath string) error {
// Read go.mod
goModData, err := b.ExtractGoMod(ctx, srcPath)
if err != nil {
return err
}
// Extract module path from go.mod
lines := strings.Split(string(goModData), "\n")
var declaredModulePath string
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "module ") {
declaredModulePath = strings.TrimSpace(strings.TrimPrefix(line, "module "))
break
}
}
if declaredModulePath == "" {
return fmt.Errorf("go.mod missing module declaration")
}
// Check if module path matches (allow version suffixes)
if !strings.HasPrefix(expectedModulePath, declaredModulePath) {
return fmt.Errorf("module path mismatch: expected %s, got %s", expectedModulePath, declaredModulePath)
}
return nil
}