Files
gohoarder/pkg/scanner/rescanner.go
T
lukaszraczylo 6b037a92b4 refactor: reorganize struct fields, add new handlers and storage backends
- [x] Reorder struct fields across codebase for consistency
- [x] Add analytics event handlers and tests
- [x] Add authentication API key management handlers and tests
- [x] Add pre-warming control handlers and tests
- [x] Implement S3 storage backend with tests
- [x] Implement SMB/CIFS storage backend with tests
- [x] Add CDN middleware tests
- [x] Integrate analytics tracking into cache manager
- [x] Add S3 and SMB storage initialization in app setup
- [x] Add CDN caching to proxy handlers
- [x] Remove distributed locking (Redis lock manager)
- [x] Remove proxy common package and utilities
- [x] Remove standalone HTTP server package
- [x] Remove logger middleware
- [x] Simplify error handling utilities
- [x] Update config with S3 and SMB options
- [x] Update cache manager signature to include analytics
2026-01-03 00:18:58 +00:00

220 lines
5.7 KiB
Go

package scanner
import (
"context"
"time"
"github.com/lukaszraczylo/gohoarder/pkg/metadata"
"github.com/lukaszraczylo/gohoarder/pkg/storage"
"github.com/rs/zerolog/log"
)
// RescanWorker handles periodic re-scanning of cached packages
type RescanWorker struct {
metadataStore metadata.MetadataStore
storage storage.StorageBackend
manager *Manager
stopCh chan struct{}
interval time.Duration
}
// NewRescanWorker creates a new rescan worker
func NewRescanWorker(manager *Manager, metadataStore metadata.MetadataStore, storageBackend storage.StorageBackend, interval time.Duration) *RescanWorker {
return &RescanWorker{
manager: manager,
metadataStore: metadataStore,
storage: storageBackend,
interval: interval,
stopCh: make(chan struct{}),
}
}
// Start begins the periodic re-scanning process
func (w *RescanWorker) Start(ctx context.Context) {
if !w.manager.enabled || w.interval == 0 {
log.Info().Msg("Rescan worker disabled")
return
}
log.Info().
Dur("interval", w.interval).
Msg("Starting package rescan worker")
ticker := time.NewTicker(w.interval)
defer ticker.Stop()
// Run initial scan immediately on startup
log.Info().Msg("Running initial package scan on startup")
w.rescanPackages(ctx)
log.Info().
Dur("next_scan", w.interval).
Msg("Initial scan complete, next scan scheduled")
for {
select {
case <-ticker.C:
w.rescanPackages(ctx)
case <-w.stopCh:
log.Info().Msg("Rescan worker stopped")
return
case <-ctx.Done():
log.Info().Msg("Rescan worker stopped (context cancelled)")
return
}
}
}
// Stop stops the rescan worker
func (w *RescanWorker) Stop() {
close(w.stopCh)
}
// rescanPackages re-scans packages that need updating
func (w *RescanWorker) rescanPackages(ctx context.Context) {
log.Info().Msg("Starting package rescan cycle - checking all packages for scan status")
// Get all packages
packages, err := w.metadataStore.ListPackages(ctx, &metadata.ListOptions{})
if err != nil {
log.Error().Err(err).Msg("Failed to list packages for rescan")
return
}
scanned := 0
skipped := 0
failed := 0
for _, pkg := range packages {
// Skip metadata entries (npm metadata pages, pypi pages, etc.)
if pkg.Version == "list" || pkg.Version == "latest" || pkg.Version == "metadata" || pkg.Version == "page" {
skipped++
continue
}
// Check if package needs rescanning
needsRescan, err := w.needsRescan(ctx, pkg)
if err != nil {
log.Error().
Err(err).
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("Failed to check rescan status")
failed++
continue
}
if !needsRescan {
log.Debug().
Str("package", pkg.Name).
Str("version", pkg.Version).
Bool("security_scanned", pkg.SecurityScanned).
Msg("Package does not need rescanning, skipping")
skipped++
continue
}
log.Info().
Str("registry", pkg.Registry).
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("Package needs rescanning")
// Get file path from storage using the storage key from the package metadata
if pkg.StorageKey == "" {
log.Warn().
Str("registry", pkg.Registry).
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("Package has no storage key, skipping rescan")
failed++
continue
}
filePath, err := w.getPackageFilePath(ctx, pkg.StorageKey)
if err != nil {
log.Warn().
Err(err).
Str("registry", pkg.Registry).
Str("package", pkg.Name).
Str("version", pkg.Version).
Str("storage_key", pkg.StorageKey).
Msg("Failed to get package file path, skipping rescan")
failed++
continue
}
if filePath == "" {
log.Debug().
Str("registry", pkg.Registry).
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("No local file path available, skipping rescan")
skipped++
continue
}
// Perform the actual scan
if err := w.manager.ScanPackage(ctx, pkg.Registry, pkg.Name, pkg.Version, filePath); err != nil {
log.Error().
Err(err).
Str("registry", pkg.Registry).
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("Failed to rescan package")
failed++
continue
}
scanned++
}
log.Info().
Int("total", len(packages)).
Int("scanned", scanned).
Int("skipped", skipped).
Int("failed", failed).
Msg("Rescan cycle completed")
}
// needsRescan checks if a package needs to be rescanned
func (w *RescanWorker) needsRescan(ctx context.Context, pkg *metadata.Package) (bool, error) {
// Get latest scan result
scanResult, err := w.metadataStore.GetScanResult(ctx, pkg.Registry, pkg.Name, pkg.Version)
if err != nil {
// No scan result - needs scanning
log.Debug().
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("Package has no scan result, needs scanning")
return true, nil
}
// If package is not marked as scanned but has scan result, it's a stale state - rescan
if !pkg.SecurityScanned {
log.Info().
Str("package", pkg.Name).
Str("version", pkg.Version).
Msg("Package has scan result but security_scanned flag is false, needs update")
return true, nil
}
// Check if scan is older than rescan interval
timeSinceLastScan := time.Since(scanResult.ScannedAt)
if timeSinceLastScan >= w.interval {
return true, nil
}
return false, nil
}
// getPackageFilePath retrieves the local file path for a package from storage
func (w *RescanWorker) getPackageFilePath(ctx context.Context, storageKey string) (string, error) {
// Check if storage backend supports local paths
if localProvider, ok := w.storage.(storage.LocalPathProvider); ok {
return localProvider.GetLocalPath(ctx, storageKey)
}
// If storage doesn't support local paths (S3, SMB), we can't rescan
return "", nil
}