Initial commit

This commit is contained in:
2026-01-02 23:14:23 +00:00
commit 48b834a62a
181 changed files with 33328 additions and 0 deletions
+546
View File
@@ -0,0 +1,546 @@
package file
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/lukaszraczylo/gohoarder/pkg/metadata"
"github.com/rs/zerolog/log"
)
// Store implements a file-based metadata store
type Store struct {
basePath string
mu sync.RWMutex
}
// Config holds file store configuration
type Config struct {
Path string
}
// New creates a new file-based metadata store
func New(cfg Config) (*Store, error) {
if cfg.Path == "" {
cfg.Path = "./metadata"
}
// Create directory if it doesn't exist
if err := os.MkdirAll(cfg.Path, 0750); err != nil {
return nil, fmt.Errorf("failed to create metadata directory: %w", err)
}
log.Info().
Str("path", cfg.Path).
Msg("File-based metadata store initialized")
return &Store{
basePath: cfg.Path,
}, nil
}
// SavePackage saves package metadata
func (s *Store) SavePackage(ctx context.Context, pkg *metadata.Package) error {
s.mu.Lock()
defer s.mu.Unlock()
// Create registry directory
regDir := filepath.Join(s.basePath, pkg.Registry)
if err := os.MkdirAll(regDir, 0750); err != nil {
return err
}
// Save to file
filename := filepath.Join(regDir, fmt.Sprintf("%s-%s.json", pkg.Name, pkg.Version))
data, err := json.MarshalIndent(pkg, "", " ")
if err != nil {
return err
}
return os.WriteFile(filename, data, 0600)
}
// GetPackage retrieves package metadata
func (s *Store) GetPackage(ctx context.Context, registry, name, version string) (*metadata.Package, error) {
s.mu.RLock()
defer s.mu.RUnlock()
filename := filepath.Join(s.basePath, registry, fmt.Sprintf("%s-%s.json", name, version))
data, err := os.ReadFile(filename) // #nosec G304 -- Filename is from internal registry structure
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
var pkg metadata.Package
if err := json.Unmarshal(data, &pkg); err != nil {
return nil, err
}
return &pkg, nil
}
// ListPackages lists all packages
func (s *Store) ListPackages(ctx context.Context, opts *metadata.ListOptions) ([]*metadata.Package, error) {
s.mu.RLock()
defer s.mu.RUnlock()
var packages []*metadata.Package
// Walk through all files
err := filepath.Walk(s.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() || filepath.Ext(path) != ".json" {
return nil
}
data, err := os.ReadFile(path) // #nosec G304 -- Path from internal file structure
if err != nil {
return nil // Skip files we can't read
}
var pkg metadata.Package
if err := json.Unmarshal(data, &pkg); err != nil {
return nil // Skip invalid JSON
}
packages = append(packages, &pkg)
return nil
})
if err != nil {
return nil, err
}
// Apply pagination if options provided
if opts != nil {
if opts.Offset >= len(packages) {
return []*metadata.Package{}, nil
}
end := opts.Offset + opts.Limit
if end > len(packages) {
end = len(packages)
}
return packages[opts.Offset:end], nil
}
return packages, nil
}
// DeletePackage deletes package metadata
func (s *Store) DeletePackage(ctx context.Context, registry, name, version string) error {
s.mu.Lock()
defer s.mu.Unlock()
filename := filepath.Join(s.basePath, registry, fmt.Sprintf("%s-%s.json", name, version))
if err := os.Remove(filename); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// SaveScanResult saves scan result
func (s *Store) SaveScanResult(ctx context.Context, result *metadata.ScanResult) error {
s.mu.Lock()
defer s.mu.Unlock()
// Create scans directory
scanDir := filepath.Join(s.basePath, "scans", result.Registry, result.PackageName)
if err := os.MkdirAll(scanDir, 0750); err != nil {
return err
}
// Save to file with timestamp
timestamp := time.Now().Unix()
filename := filepath.Join(scanDir, fmt.Sprintf("%s-%d.json", result.PackageVersion, timestamp))
data, err := json.MarshalIndent(result, "", " ")
if err != nil {
return err
}
return os.WriteFile(filename, data, 0600)
}
// UpdateDownloadCount increments download counter
func (s *Store) UpdateDownloadCount(ctx context.Context, registry, name, version string) error {
s.mu.Lock()
defer s.mu.Unlock()
// Load package
pkg, err := s.GetPackage(ctx, registry, name, version)
if err != nil || pkg == nil {
return err
}
// Increment counter
pkg.DownloadCount++
pkg.LastAccessed = time.Now()
// Save back
return s.SavePackage(ctx, pkg)
}
// GetStats returns statistics for a registry
func (s *Store) GetStats(ctx context.Context, registry string) (*metadata.Stats, error) {
s.mu.RLock()
defer s.mu.RUnlock()
stats := &metadata.Stats{
Registry: registry,
LastUpdated: time.Now(),
}
// Walk through files and calculate stats
err := filepath.Walk(s.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() || filepath.Ext(path) != ".json" {
return nil
}
data, err := os.ReadFile(path) // #nosec G304 -- Path from internal file structure
if err != nil {
return nil
}
var pkg metadata.Package
if err := json.Unmarshal(data, &pkg); err != nil {
return nil
}
// Filter by registry if specified
if registry != "" && pkg.Registry != registry {
return nil
}
stats.TotalPackages++
stats.TotalSize += pkg.Size
stats.TotalDownloads += pkg.DownloadCount
if pkg.SecurityScanned {
stats.ScannedPackages++
}
return nil
})
if err != nil {
return nil, err
}
return stats, nil
}
// GetScanResult retrieves latest scan result
func (s *Store) GetScanResult(ctx context.Context, registry, name, version string) (*metadata.ScanResult, error) {
s.mu.RLock()
defer s.mu.RUnlock()
scanDir := filepath.Join(s.basePath, "scans", registry, name)
pattern := filepath.Join(scanDir, fmt.Sprintf("%s-*.json", version))
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, err
}
if len(matches) == 0 {
return nil, nil
}
// Get the latest file
latestFile := matches[len(matches)-1]
data, err := os.ReadFile(latestFile) // #nosec G304 -- Path from glob match on internal structure
if err != nil {
return nil, err
}
var result metadata.ScanResult
if err := json.Unmarshal(data, &result); err != nil {
return nil, err
}
return &result, nil
}
// Count returns total number of packages
func (s *Store) Count(ctx context.Context) (int, error) {
s.mu.RLock()
defer s.mu.RUnlock()
count := 0
err := filepath.Walk(s.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && filepath.Ext(path) == ".json" && filepath.Dir(path) != filepath.Join(s.basePath, "scans") {
count++
}
return nil
})
if err != nil {
return 0, err
}
return count, nil
}
// Health checks if the store is healthy
func (s *Store) Health(ctx context.Context) error {
// Check if directory is accessible
_, err := os.Stat(s.basePath)
return err
}
// SaveCVEBypass saves a CVE bypass (admin only)
func (s *Store) SaveCVEBypass(ctx context.Context, bypass *metadata.CVEBypass) error {
s.mu.Lock()
defer s.mu.Unlock()
// Create bypasses directory
bypassesDir := filepath.Join(s.basePath, "bypasses")
if err := os.MkdirAll(bypassesDir, 0750); err != nil {
return err
}
// Save to file
filename := filepath.Join(bypassesDir, fmt.Sprintf("%s.json", bypass.ID))
data, err := json.MarshalIndent(bypass, "", " ")
if err != nil {
return err
}
return os.WriteFile(filename, data, 0600)
}
// GetActiveCVEBypasses retrieves all active (non-expired) CVE bypasses
func (s *Store) GetActiveCVEBypasses(ctx context.Context) ([]*metadata.CVEBypass, error) {
s.mu.RLock()
defer s.mu.RUnlock()
bypassesDir := filepath.Join(s.basePath, "bypasses")
var bypasses []*metadata.CVEBypass
now := time.Now()
// Read all bypass files
err := filepath.Walk(bypassesDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil // bypasses directory doesn't exist yet
}
return err
}
if info.IsDir() || filepath.Ext(path) != ".json" {
return nil
}
data, err := os.ReadFile(path) // #nosec G304 -- Path from internal file structure
if err != nil {
return err
}
var bypass metadata.CVEBypass
if err := json.Unmarshal(data, &bypass); err != nil {
log.Warn().Err(err).Str("file", path).Msg("Failed to unmarshal bypass")
return nil
}
// Only include active and non-expired bypasses
if bypass.Active && bypass.ExpiresAt.After(now) {
bypasses = append(bypasses, &bypass)
}
return nil
})
if err != nil {
return nil, err
}
return bypasses, nil
}
// ListCVEBypasses lists all CVE bypasses (including expired)
func (s *Store) ListCVEBypasses(ctx context.Context, opts *metadata.BypassListOptions) ([]*metadata.CVEBypass, error) {
s.mu.RLock()
defer s.mu.RUnlock()
bypassesDir := filepath.Join(s.basePath, "bypasses")
var bypasses []*metadata.CVEBypass
now := time.Now()
// Read all bypass files
err := filepath.Walk(bypassesDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil // bypasses directory doesn't exist yet
}
return err
}
if info.IsDir() || filepath.Ext(path) != ".json" {
return nil
}
data, err := os.ReadFile(path) // #nosec G304 -- Path from internal file structure
if err != nil {
return err
}
var bypass metadata.CVEBypass
if err := json.Unmarshal(data, &bypass); err != nil {
log.Warn().Err(err).Str("file", path).Msg("Failed to unmarshal bypass")
return nil
}
// Apply filters if options provided
if opts != nil {
if opts.Type != "" && bypass.Type != opts.Type {
return nil
}
if !opts.IncludeExpired && bypass.ExpiresAt.Before(now) {
return nil
}
if opts.ActiveOnly && !bypass.Active {
return nil
}
}
bypasses = append(bypasses, &bypass)
return nil
})
if err != nil {
return nil, err
}
// Apply limit and offset if specified
if opts != nil {
if opts.Offset > 0 && opts.Offset < len(bypasses) {
bypasses = bypasses[opts.Offset:]
} else if opts.Offset >= len(bypasses) {
return []*metadata.CVEBypass{}, nil
}
if opts.Limit > 0 && opts.Limit < len(bypasses) {
bypasses = bypasses[:opts.Limit]
}
}
return bypasses, nil
}
// DeleteCVEBypass deletes a CVE bypass by ID
func (s *Store) DeleteCVEBypass(ctx context.Context, id string) error {
s.mu.Lock()
defer s.mu.Unlock()
filename := filepath.Join(s.basePath, "bypasses", fmt.Sprintf("%s.json", id))
err := os.Remove(filename)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("CVE bypass not found: %s", id)
}
return err
}
return nil
}
// CleanupExpiredBypasses removes expired bypasses
func (s *Store) CleanupExpiredBypasses(ctx context.Context) (int, error) {
s.mu.Lock()
defer s.mu.Unlock()
bypassesDir := filepath.Join(s.basePath, "bypasses")
count := 0
now := time.Now()
// Read all bypass files
err := filepath.Walk(bypassesDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil // bypasses directory doesn't exist yet
}
return err
}
if info.IsDir() || filepath.Ext(path) != ".json" {
return nil
}
data, err := os.ReadFile(path) // #nosec G304 -- Path from internal file structure
if err != nil {
return err
}
var bypass metadata.CVEBypass
if err := json.Unmarshal(data, &bypass); err != nil {
log.Warn().Err(err).Str("file", path).Msg("Failed to unmarshal bypass")
return nil
}
// Delete if expired
if bypass.ExpiresAt.Before(now) {
if err := os.Remove(path); err != nil {
log.Warn().Err(err).Str("file", path).Msg("Failed to delete expired bypass")
} else {
count++
}
}
return nil
})
if err != nil {
return 0, err
}
return count, nil
}
// GetTimeSeriesStats returns time-series download statistics
// File-based store doesn't support time-series statistics
func (s *Store) GetTimeSeriesStats(ctx context.Context, period string, registry string) (*metadata.TimeSeriesStats, error) {
// Return empty time-series data for file-based store
return &metadata.TimeSeriesStats{
Period: period,
Registry: registry,
DataPoints: []*metadata.TimeSeriesDataPoint{},
}, nil
}
// AggregateDownloadData aggregates download data
// File-based store doesn't support aggregation
func (s *Store) AggregateDownloadData(ctx context.Context) error {
// No-op for file-based store
return nil
}
// Close closes the store
func (s *Store) Close() error {
// Nothing to close for file-based store
return nil
}
+211
View File
@@ -0,0 +1,211 @@
package metadata
import (
"context"
"strings"
"time"
)
// Store is an alias for MetadataStore for convenience
type Store = MetadataStore
// MetadataStore defines the interface for package metadata storage
type MetadataStore interface {
// SavePackage saves package metadata
SavePackage(ctx context.Context, pkg *Package) error
// GetPackage retrieves package metadata
GetPackage(ctx context.Context, registry, name, version string) (*Package, error)
// DeletePackage deletes package metadata
DeletePackage(ctx context.Context, registry, name, version string) error
// ListPackages lists packages with optional filtering
ListPackages(ctx context.Context, opts *ListOptions) ([]*Package, error)
// UpdateDownloadCount increments download counter
UpdateDownloadCount(ctx context.Context, registry, name, version string) error
// GetStats returns statistics
GetStats(ctx context.Context, registry string) (*Stats, error)
// SaveScanResult saves security scan result
SaveScanResult(ctx context.Context, result *ScanResult) error
// GetScanResult retrieves security scan result
GetScanResult(ctx context.Context, registry, name, version string) (*ScanResult, error)
// SaveCVEBypass saves a CVE bypass (admin only)
SaveCVEBypass(ctx context.Context, bypass *CVEBypass) error
// GetActiveCVEBypasses retrieves all active (non-expired) CVE bypasses
GetActiveCVEBypasses(ctx context.Context) ([]*CVEBypass, error)
// ListCVEBypasses lists all CVE bypasses (including expired)
ListCVEBypasses(ctx context.Context, opts *BypassListOptions) ([]*CVEBypass, error)
// DeleteCVEBypass deletes a CVE bypass by ID
DeleteCVEBypass(ctx context.Context, id string) error
// CleanupExpiredBypasses removes expired bypasses
CleanupExpiredBypasses(ctx context.Context) (int, error)
// Count returns total number of packages
Count(ctx context.Context) (int, error)
// Health checks metadata store health
Health(ctx context.Context) error
// GetTimeSeriesStats returns time-series download statistics
GetTimeSeriesStats(ctx context.Context, period string, registry string) (*TimeSeriesStats, error)
// AggregateDownloadData aggregates raw download events and cleans up old data
AggregateDownloadData(ctx context.Context) error
// Close closes the metadata store
Close() error
}
// Package represents package metadata
type Package struct {
ID string `json:"id"`
Registry string `json:"registry"` // npm, pypi, go
Name string `json:"name"` // Package name
Version string `json:"version"` // Package version
StorageKey string `json:"storage_key"` // Key in storage backend
Size int64 `json:"size"` // Package size in bytes
ChecksumMD5 string `json:"checksum_md5"` // MD5 checksum
ChecksumSHA256 string `json:"checksum_sha256"` // SHA256 checksum
UpstreamURL string `json:"upstream_url"` // Original upstream URL
CachedAt time.Time `json:"cached_at"` // When cached
LastAccessed time.Time `json:"last_accessed"` // Last access time
ExpiresAt *time.Time `json:"expires_at"` // Expiration time (nil = never)
DownloadCount int64 `json:"download_count"` // Download counter
Metadata map[string]string `json:"metadata"` // Additional metadata
SecurityScanned bool `json:"security_scanned"` // Has been scanned
RequiresAuth bool `json:"requires_auth"` // Package requires authentication
AuthProvider string `json:"auth_provider"` // Auth provider (github.com, npm.pkg.github.com, etc.)
}
// ScanResult represents a security scan result
type ScanResult struct {
ID string `json:"id"`
Registry string `json:"registry"`
PackageName string `json:"package_name"`
PackageVersion string `json:"package_version"`
Scanner string `json:"scanner"` // trivy, osv, etc.
ScannedAt time.Time `json:"scanned_at"`
Status ScanStatus `json:"status"` // clean, vulnerable, error
VulnerabilityCount int `json:"vulnerability_count"`
Vulnerabilities []Vulnerability `json:"vulnerabilities"`
Details map[string]interface{} `json:"details"` // Scanner-specific details
}
// Vulnerability represents a security vulnerability
type Vulnerability struct {
ID string `json:"id"` // CVE-xxx, GHSA-xxx, etc.
Severity string `json:"severity"` // critical, high, moderate, low
Title string `json:"title"`
Description string `json:"description"`
References []string `json:"references"`
FixedIn string `json:"fixed_in"` // Version where fixed
DetectedBy []string `json:"detected_by,omitempty"` // List of scanners that detected this vulnerability
}
// NormalizeSeverity normalizes severity names to standard values
// Ensures consistent naming: CRITICAL, HIGH, MODERATE, LOW
func NormalizeSeverity(severity string) string {
normalized := strings.ToUpper(strings.TrimSpace(severity))
// Map MEDIUM to MODERATE for consistency
if normalized == "MEDIUM" {
return "MODERATE"
}
// Ensure we only return valid severity levels
switch normalized {
case "CRITICAL", "HIGH", "MODERATE", "LOW":
return normalized
default:
return "LOW" // Default unknown severities to LOW
}
}
// ScanStatus represents scan result status
type ScanStatus string
const (
ScanStatusClean ScanStatus = "clean"
ScanStatusVulnerable ScanStatus = "vulnerable"
ScanStatusError ScanStatus = "error"
ScanStatusPending ScanStatus = "pending"
)
// Stats represents metadata statistics
type Stats struct {
Registry string `json:"registry"`
TotalPackages int64 `json:"total_packages"`
TotalSize int64 `json:"total_size"`
TotalDownloads int64 `json:"total_downloads"`
ScannedPackages int64 `json:"scanned_packages"`
VulnerablePackages int64 `json:"vulnerable_packages"`
LastUpdated time.Time `json:"last_updated"`
}
// TimeSeriesDataPoint represents a single data point in time-series
type TimeSeriesDataPoint struct {
Timestamp time.Time `json:"timestamp"`
Value int64 `json:"value"`
}
// TimeSeriesStats represents time-series download statistics
type TimeSeriesStats struct {
Period string `json:"period"` // 1h, 1day, 7day, 30day
Registry string `json:"registry"` // empty string for all registries
DataPoints []*TimeSeriesDataPoint `json:"data_points"`
}
// CVEBypass represents a temporary bypass for a CVE or package
type CVEBypass struct {
ID string `json:"id"` // Unique bypass ID
Type BypassType `json:"type"` // cve, package
Target string `json:"target"` // CVE ID (e.g., "CVE-2021-23337") or package (e.g., "npm/lodash@4.17.20")
Reason string `json:"reason"` // Why this bypass was created
CreatedBy string `json:"created_by"` // Admin user who created it
CreatedAt time.Time `json:"created_at"` // When created
ExpiresAt time.Time `json:"expires_at"` // When it expires
AppliesTo string `json:"applies_to,omitempty"` // Optional: limit to specific package (for CVE bypasses)
NotifyOnExpiry bool `json:"notify_on_expiry"` // Send notification when expired
Active bool `json:"active"` // Can be deactivated without deletion
}
// BypassType represents the type of bypass
type BypassType string
const (
BypassTypeCVE BypassType = "cve" // Bypass specific CVE
BypassTypePackage BypassType = "package" // Bypass entire package
)
// BypassListOptions contains options for listing CVE bypasses
type BypassListOptions struct {
Type BypassType // Filter by type
IncludeExpired bool // Include expired bypasses
ActiveOnly bool // Only active bypasses
Limit int // Max results
Offset int // Pagination offset
}
// ListOptions contains options for listing packages
type ListOptions struct {
Registry string // Filter by registry
NamePrefix string // Filter by name prefix
MinSize int64 // Minimum package size
MaxSize int64 // Maximum package size
ScannedOnly bool // Only scanned packages
SinceDate time.Time // Packages cached since date
Limit int // Max results
Offset int // Pagination offset
SortBy string // Sort field (name, size, cached_at, download_count)
SortDesc bool // Sort descending
}
File diff suppressed because it is too large Load Diff