mirror of
https://github.com/lukaszraczylo/gohoarder.git
synced 2026-06-05 22:53:53 +00:00
6b037a92b4
- [x] Reorder struct fields across codebase for consistency - [x] Add analytics event handlers and tests - [x] Add authentication API key management handlers and tests - [x] Add pre-warming control handlers and tests - [x] Implement S3 storage backend with tests - [x] Implement SMB/CIFS storage backend with tests - [x] Add CDN middleware tests - [x] Integrate analytics tracking into cache manager - [x] Add S3 and SMB storage initialization in app setup - [x] Add CDN caching to proxy handlers - [x] Remove distributed locking (Redis lock manager) - [x] Remove proxy common package and utilities - [x] Remove standalone HTTP server package - [x] Remove logger middleware - [x] Simplify error handling utilities - [x] Update config with S3 and SMB options - [x] Update cache manager signature to include analytics
220 lines
5.7 KiB
Go
220 lines
5.7 KiB
Go
package scanner
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/lukaszraczylo/gohoarder/pkg/metadata"
|
|
"github.com/lukaszraczylo/gohoarder/pkg/storage"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// RescanWorker handles periodic re-scanning of cached packages
|
|
type RescanWorker struct {
|
|
metadataStore metadata.MetadataStore
|
|
storage storage.StorageBackend
|
|
manager *Manager
|
|
stopCh chan struct{}
|
|
interval time.Duration
|
|
}
|
|
|
|
// NewRescanWorker creates a new rescan worker
|
|
func NewRescanWorker(manager *Manager, metadataStore metadata.MetadataStore, storageBackend storage.StorageBackend, interval time.Duration) *RescanWorker {
|
|
return &RescanWorker{
|
|
manager: manager,
|
|
metadataStore: metadataStore,
|
|
storage: storageBackend,
|
|
interval: interval,
|
|
stopCh: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// Start begins the periodic re-scanning process
|
|
func (w *RescanWorker) Start(ctx context.Context) {
|
|
if !w.manager.enabled || w.interval == 0 {
|
|
log.Info().Msg("Rescan worker disabled")
|
|
return
|
|
}
|
|
|
|
log.Info().
|
|
Dur("interval", w.interval).
|
|
Msg("Starting package rescan worker")
|
|
|
|
ticker := time.NewTicker(w.interval)
|
|
defer ticker.Stop()
|
|
|
|
// Run initial scan immediately on startup
|
|
log.Info().Msg("Running initial package scan on startup")
|
|
w.rescanPackages(ctx)
|
|
log.Info().
|
|
Dur("next_scan", w.interval).
|
|
Msg("Initial scan complete, next scan scheduled")
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
w.rescanPackages(ctx)
|
|
case <-w.stopCh:
|
|
log.Info().Msg("Rescan worker stopped")
|
|
return
|
|
case <-ctx.Done():
|
|
log.Info().Msg("Rescan worker stopped (context cancelled)")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop stops the rescan worker
|
|
func (w *RescanWorker) Stop() {
|
|
close(w.stopCh)
|
|
}
|
|
|
|
// rescanPackages re-scans packages that need updating
|
|
func (w *RescanWorker) rescanPackages(ctx context.Context) {
|
|
log.Info().Msg("Starting package rescan cycle - checking all packages for scan status")
|
|
|
|
// Get all packages
|
|
packages, err := w.metadataStore.ListPackages(ctx, &metadata.ListOptions{})
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to list packages for rescan")
|
|
return
|
|
}
|
|
|
|
scanned := 0
|
|
skipped := 0
|
|
failed := 0
|
|
|
|
for _, pkg := range packages {
|
|
// Skip metadata entries (npm metadata pages, pypi pages, etc.)
|
|
if pkg.Version == "list" || pkg.Version == "latest" || pkg.Version == "metadata" || pkg.Version == "page" {
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
// Check if package needs rescanning
|
|
needsRescan, err := w.needsRescan(ctx, pkg)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("Failed to check rescan status")
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
if !needsRescan {
|
|
log.Debug().
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Bool("security_scanned", pkg.SecurityScanned).
|
|
Msg("Package does not need rescanning, skipping")
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
log.Info().
|
|
Str("registry", pkg.Registry).
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("Package needs rescanning")
|
|
|
|
// Get file path from storage using the storage key from the package metadata
|
|
if pkg.StorageKey == "" {
|
|
log.Warn().
|
|
Str("registry", pkg.Registry).
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("Package has no storage key, skipping rescan")
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
filePath, err := w.getPackageFilePath(ctx, pkg.StorageKey)
|
|
if err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("registry", pkg.Registry).
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Str("storage_key", pkg.StorageKey).
|
|
Msg("Failed to get package file path, skipping rescan")
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
if filePath == "" {
|
|
log.Debug().
|
|
Str("registry", pkg.Registry).
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("No local file path available, skipping rescan")
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
// Perform the actual scan
|
|
if err := w.manager.ScanPackage(ctx, pkg.Registry, pkg.Name, pkg.Version, filePath); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("registry", pkg.Registry).
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("Failed to rescan package")
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
scanned++
|
|
}
|
|
|
|
log.Info().
|
|
Int("total", len(packages)).
|
|
Int("scanned", scanned).
|
|
Int("skipped", skipped).
|
|
Int("failed", failed).
|
|
Msg("Rescan cycle completed")
|
|
}
|
|
|
|
// needsRescan checks if a package needs to be rescanned
|
|
func (w *RescanWorker) needsRescan(ctx context.Context, pkg *metadata.Package) (bool, error) {
|
|
// Get latest scan result
|
|
scanResult, err := w.metadataStore.GetScanResult(ctx, pkg.Registry, pkg.Name, pkg.Version)
|
|
if err != nil {
|
|
// No scan result - needs scanning
|
|
log.Debug().
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("Package has no scan result, needs scanning")
|
|
return true, nil
|
|
}
|
|
|
|
// If package is not marked as scanned but has scan result, it's a stale state - rescan
|
|
if !pkg.SecurityScanned {
|
|
log.Info().
|
|
Str("package", pkg.Name).
|
|
Str("version", pkg.Version).
|
|
Msg("Package has scan result but security_scanned flag is false, needs update")
|
|
return true, nil
|
|
}
|
|
|
|
// Check if scan is older than rescan interval
|
|
timeSinceLastScan := time.Since(scanResult.ScannedAt)
|
|
if timeSinceLastScan >= w.interval {
|
|
return true, nil
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// getPackageFilePath retrieves the local file path for a package from storage
|
|
func (w *RescanWorker) getPackageFilePath(ctx context.Context, storageKey string) (string, error) {
|
|
// Check if storage backend supports local paths
|
|
if localProvider, ok := w.storage.(storage.LocalPathProvider); ok {
|
|
return localProvider.GetLocalPath(ctx, storageKey)
|
|
}
|
|
|
|
// If storage doesn't support local paths (S3, SMB), we can't rescan
|
|
return "", nil
|
|
}
|