chore(schema): migrate to GORM V2 with multi-database support

- [x] Implement GORM V2 metadata store with SQLite, PostgreSQL, and MySQL support
- [x] Add database migration system using gormigrate for schema versioning
- [x] Create migration CLI tool with support for migrate, rollback, and status commands
- [x] Add Docker support for migration container (Dockerfile.migrate)
- [x] Implement automatic partition management for PostgreSQL time-series tables
- [x] Add background aggregation worker for download statistics
- [x] Support connection pooling configuration (max_open_conns, max_idle_conns, conn_max_lifetime)
- [x] Add blocking mechanism based on vulnerability thresholds in stats and handlers
- [x] Update Helm charts with migration init containers and multi-database configuration
- [x] Replace deprecated SQLite store with optimized GORM implementation
- [x] Add comprehensive integration tests for MySQL and PostgreSQL
- [x] Update frontend to display blocked packages and storage utilization
- [x] Add goreleaser configuration for migrate binary and container image
- [x] Update configuration examples with database backend options and recommendations
This commit is contained in:
2026-01-03 20:44:23 +00:00
parent b129279fb8
commit c0061b99e3
37 changed files with 5711 additions and 1222 deletions
+367
View File
@@ -0,0 +1,367 @@
-- GoHoarder Database Schema V2 - MySQL/MariaDB
-- Optimized for multi-user production deployments
-- Created: 2026-01-03
-- Requires: MySQL 8.0+ or MariaDB 10.5+
-- Set charset and collation
SET NAMES utf8mb4;
SET CHARACTER SET utf8mb4;
-- ============================================================================
-- TABLE: registries
-- Purpose: Normalized registry data (eliminates repeated strings)
-- ============================================================================
CREATE TABLE IF NOT EXISTS registries (
id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
display_name VARCHAR(100) NOT NULL,
upstream_url VARCHAR(512) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
scan_by_default BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
INDEX idx_registry_name (name),
INDEX idx_registry_enabled (enabled, deleted_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: packages
-- Purpose: Core package metadata with denormalized counts for performance
-- ============================================================================
CREATE TABLE IF NOT EXISTS packages (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
registry_id INT UNSIGNED NOT NULL,
name VARCHAR(255) NOT NULL,
version VARCHAR(100) NOT NULL,
-- Storage information
storage_key VARCHAR(512) UNIQUE NOT NULL,
size BIGINT NOT NULL,
checksum_md5 CHAR(32),
checksum_sha256 CHAR(64),
upstream_url VARCHAR(1024),
-- Cache management
cached_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_accessed TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP NULL,
access_count BIGINT NOT NULL DEFAULT 0,
-- Security (denormalized for performance)
security_scanned BOOLEAN NOT NULL DEFAULT FALSE,
last_scanned_at TIMESTAMP NULL,
vulnerability_count INT NOT NULL DEFAULT 0,
highest_severity VARCHAR(20),
-- Authentication
requires_auth BOOLEAN NOT NULL DEFAULT FALSE,
auth_provider VARCHAR(50),
-- Audit trail
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE RESTRICT ON UPDATE CASCADE,
UNIQUE INDEX idx_package_registry_name_version (registry_id, name, version, deleted_at),
INDEX idx_package_storage_key (storage_key),
INDEX idx_package_name (name(50)),
INDEX idx_package_last_accessed (last_accessed DESC),
INDEX idx_package_expires_at (expires_at),
INDEX idx_package_access_count (access_count DESC),
INDEX idx_package_size (size DESC),
INDEX idx_package_vuln_count (vulnerability_count),
INDEX idx_package_severity (highest_severity),
INDEX idx_package_security_scanned (security_scanned, deleted_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: package_metadata
-- Purpose: Structured metadata (1:1 with packages)
-- ============================================================================
CREATE TABLE IF NOT EXISTS package_metadata (
package_id BIGINT UNSIGNED PRIMARY KEY,
author VARCHAR(255),
license VARCHAR(100),
homepage VARCHAR(512),
repository VARCHAR(512),
description TEXT,
keywords JSON, -- JSON array for MySQL 8.0+
raw_metadata JSON, -- Full metadata as JSON
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_metadata_author (author(100)),
INDEX idx_metadata_license (license)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: vulnerabilities
-- Purpose: Normalized vulnerability data (each CVE stored once)
-- ============================================================================
CREATE TABLE IF NOT EXISTS vulnerabilities (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
cve_id VARCHAR(50) UNIQUE NOT NULL,
title VARCHAR(512) NOT NULL,
description TEXT,
severity VARCHAR(20) NOT NULL,
cvss FLOAT,
published_at TIMESTAMP NOT NULL,
fixed_version VARCHAR(100),
references JSON,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
UNIQUE INDEX idx_vuln_cve_id (cve_id),
INDEX idx_vuln_severity (severity),
INDEX idx_vuln_cvss (cvss DESC),
INDEX idx_vuln_published (published_at DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: package_vulnerabilities
-- Purpose: Many-to-many relationship between packages and vulnerabilities
-- ============================================================================
CREATE TABLE IF NOT EXISTS package_vulnerabilities (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
package_id BIGINT UNSIGNED NOT NULL,
vulnerability_id BIGINT UNSIGNED NOT NULL,
scanner VARCHAR(50) NOT NULL,
detected_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
bypassed BOOLEAN NOT NULL DEFAULT FALSE,
bypass_id BIGINT UNSIGNED,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (vulnerability_id) REFERENCES vulnerabilities(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_pkg_vuln_package (package_id, deleted_at),
INDEX idx_pkg_vuln_vuln (vulnerability_id, deleted_at),
INDEX idx_pkg_vuln_composite (package_id, vulnerability_id, deleted_at),
INDEX idx_pkg_vuln_scanner (scanner),
INDEX idx_pkg_vuln_bypassed (bypassed)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: scan_results
-- Purpose: Security scan results with severity breakdown
-- ============================================================================
CREATE TABLE IF NOT EXISTS scan_results (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
package_id BIGINT UNSIGNED NOT NULL,
scanner VARCHAR(50) NOT NULL,
scanned_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(20) NOT NULL,
vuln_count INT NOT NULL DEFAULT 0,
critical_count INT NOT NULL DEFAULT 0,
high_count INT NOT NULL DEFAULT 0,
medium_count INT NOT NULL DEFAULT 0,
low_count INT NOT NULL DEFAULT 0,
scan_duration INT NOT NULL DEFAULT 0,
details JSON,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_scan_package_scanner (package_id, scanner, deleted_at),
INDEX idx_scan_scanned_at (scanned_at DESC),
INDEX idx_scan_status (status),
INDEX idx_scan_vuln_count (vuln_count)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: cve_bypasses
-- Purpose: CVE bypass rules with usage tracking
-- ============================================================================
CREATE TABLE IF NOT EXISTS cve_bypasses (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
type VARCHAR(20) NOT NULL,
target VARCHAR(512) NOT NULL,
reason TEXT NOT NULL,
created_by VARCHAR(255) NOT NULL,
expires_at TIMESTAMP NOT NULL,
notify_on_expiry BOOLEAN NOT NULL DEFAULT FALSE,
active BOOLEAN NOT NULL DEFAULT TRUE,
usage_count BIGINT NOT NULL DEFAULT 0,
last_used_at TIMESTAMP NULL,
registry_id INT UNSIGNED,
package_id BIGINT UNSIGNED,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE SET NULL ON UPDATE CASCADE,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE SET NULL ON UPDATE CASCADE,
INDEX idx_bypass_type (type),
INDEX idx_bypass_target (target(100)),
INDEX idx_bypass_active (active, deleted_at),
INDEX idx_bypass_expires_at (expires_at, active),
INDEX idx_bypass_created_by (created_by(100))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: download_events
-- Purpose: High-volume time-series data
-- Note: MySQL doesn't support native partitioning as elegantly as PostgreSQL
-- Consider manual partitioning or TimescaleDB if needed
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_events (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
package_id BIGINT UNSIGNED NOT NULL,
registry_id INT UNSIGNED NOT NULL,
downloaded_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
user_agent VARCHAR(512),
ip_address VARCHAR(45),
authenticated BOOLEAN NOT NULL DEFAULT FALSE,
username VARCHAR(255),
INDEX idx_download_events_package (package_id, downloaded_at),
INDEX idx_download_events_registry (registry_id),
INDEX idx_download_events_time (downloaded_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: download_stats_hourly
-- Purpose: Pre-aggregated hourly statistics
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_stats_hourly (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
registry_id INT UNSIGNED NOT NULL,
package_id BIGINT UNSIGNED,
time_bucket TIMESTAMP NOT NULL,
download_count BIGINT NOT NULL DEFAULT 0,
unique_ips BIGINT NOT NULL DEFAULT 0,
auth_downloads BIGINT NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
UNIQUE INDEX idx_stats_hourly_composite (registry_id, IFNULL(package_id, 0), time_bucket),
INDEX idx_stats_hourly_time (time_bucket DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: download_stats_daily
-- Purpose: Pre-aggregated daily statistics
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_stats_daily (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
registry_id INT UNSIGNED NOT NULL,
package_id BIGINT UNSIGNED,
time_bucket TIMESTAMP NOT NULL,
download_count BIGINT NOT NULL DEFAULT 0,
unique_ips BIGINT NOT NULL DEFAULT 0,
auth_downloads BIGINT NOT NULL DEFAULT 0,
top_user_agents JSON,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
UNIQUE INDEX idx_stats_daily_composite (registry_id, IFNULL(package_id, 0), time_bucket),
INDEX idx_stats_daily_time (time_bucket DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: audit_log
-- Purpose: Audit trail for compliance
-- ============================================================================
CREATE TABLE IF NOT EXISTS audit_log (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
entity_type VARCHAR(50) NOT NULL,
entity_id BIGINT NOT NULL,
action VARCHAR(20) NOT NULL,
username VARCHAR(255) NOT NULL,
timestamp TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
changes JSON,
ip_address VARCHAR(45),
user_agent VARCHAR(512),
INDEX idx_audit_log_entity (entity_type, entity_id),
INDEX idx_audit_log_username (username(100)),
INDEX idx_audit_log_timestamp (timestamp DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- SEED DATA: Default registries
-- ============================================================================
INSERT INTO registries (name, display_name, upstream_url, enabled, scan_by_default) VALUES
('npm', 'NPM Registry', 'https://registry.npmjs.org', TRUE, TRUE),
('pypi', 'PyPI', 'https://pypi.org', TRUE, TRUE),
('go', 'Go Modules', 'https://proxy.golang.org', TRUE, TRUE)
ON DUPLICATE KEY UPDATE
display_name = VALUES(display_name),
upstream_url = VALUES(upstream_url);
-- ============================================================================
-- VIEWS: Convenience views for common queries
-- ============================================================================
CREATE OR REPLACE VIEW v_vulnerable_packages AS
SELECT
r.name AS registry,
p.name,
p.version,
p.vulnerability_count,
p.highest_severity,
p.last_scanned_at
FROM packages p
JOIN registries r ON p.registry_id = r.id
WHERE p.vulnerability_count > 0 AND p.deleted_at IS NULL
ORDER BY
CASE p.highest_severity
WHEN 'critical' THEN 1
WHEN 'high' THEN 2
WHEN 'medium' THEN 3
WHEN 'low' THEN 4
ELSE 5
END,
p.vulnerability_count DESC;
-- ============================================================================
-- PERFORMANCE TUNING RECOMMENDATIONS
-- ============================================================================
-- Set InnoDB buffer pool size to 50-70% of RAM
-- SET GLOBAL innodb_buffer_pool_size = 4294967296; -- 4GB
-- Enable query cache (MySQL 5.7 and earlier)
-- SET GLOBAL query_cache_type = 1;
-- SET GLOBAL query_cache_size = 67108864; -- 64MB
-- Optimize for SSD
-- SET GLOBAL innodb_flush_log_at_trx_commit = 2;
-- SET GLOBAL innodb_io_capacity = 2000;
-- ============================================================================
-- COMPLETE
-- ============================================================================
SELECT 'Schema V2 created successfully for MySQL/MariaDB!' AS status;
@@ -0,0 +1,470 @@
-- GoHoarder Database Schema V2 - PostgreSQL
-- Optimized for multi-user production deployments
-- Created: 2026-01-03
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ============================================================================
-- TABLE: registries
-- Purpose: Normalized registry data (eliminates repeated strings)
-- ============================================================================
CREATE TABLE IF NOT EXISTS registries (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
display_name VARCHAR(100) NOT NULL,
upstream_url VARCHAR(512) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
scan_by_default BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
CREATE INDEX idx_registry_name ON registries(name) WHERE deleted_at IS NULL;
CREATE INDEX idx_registry_enabled ON registries(enabled) WHERE enabled = TRUE AND deleted_at IS NULL;
COMMENT ON TABLE registries IS 'Normalized registry data (npm, pypi, go)';
COMMENT ON COLUMN registries.name IS 'Short name: npm, pypi, go';
COMMENT ON COLUMN registries.display_name IS 'Human-readable name: NPM Registry, PyPI';
-- ============================================================================
-- TABLE: packages
-- Purpose: Core package metadata with denormalized counts for performance
-- ============================================================================
CREATE TABLE IF NOT EXISTS packages (
id BIGSERIAL PRIMARY KEY,
registry_id INTEGER NOT NULL REFERENCES registries(id) ON DELETE RESTRICT,
name VARCHAR(255) NOT NULL,
version VARCHAR(100) NOT NULL,
-- Storage information
storage_key VARCHAR(512) UNIQUE NOT NULL,
size BIGINT NOT NULL,
checksum_md5 VARCHAR(32),
checksum_sha256 VARCHAR(64),
upstream_url VARCHAR(1024),
-- Cache management
cached_at TIMESTAMP NOT NULL DEFAULT NOW(),
last_accessed TIMESTAMP NOT NULL DEFAULT NOW(),
expires_at TIMESTAMP,
access_count BIGINT NOT NULL DEFAULT 0,
-- Security (denormalized for performance)
security_scanned BOOLEAN NOT NULL DEFAULT FALSE,
last_scanned_at TIMESTAMP,
vulnerability_count INTEGER NOT NULL DEFAULT 0,
highest_severity VARCHAR(20), -- critical, high, medium, low, none
-- Authentication
requires_auth BOOLEAN NOT NULL DEFAULT FALSE,
auth_provider VARCHAR(50),
-- Audit trail
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
-- Composite indexes for common queries
CREATE UNIQUE INDEX idx_package_registry_name_version
ON packages(registry_id, name, version) WHERE deleted_at IS NULL;
CREATE INDEX idx_package_storage_key ON packages(storage_key);
CREATE INDEX idx_package_name ON packages(name text_pattern_ops) WHERE deleted_at IS NULL;
CREATE INDEX idx_package_last_accessed ON packages(last_accessed DESC) WHERE deleted_at IS NULL;
CREATE INDEX idx_package_expires_at ON packages(expires_at) WHERE expires_at IS NOT NULL AND deleted_at IS NULL;
CREATE INDEX idx_package_access_count ON packages(access_count DESC) WHERE deleted_at IS NULL;
CREATE INDEX idx_package_size ON packages(size DESC);
-- Partial indexes for security queries
CREATE INDEX idx_package_vuln_count ON packages(vulnerability_count) WHERE vulnerability_count > 0 AND deleted_at IS NULL;
CREATE INDEX idx_package_severity ON packages(highest_severity) WHERE highest_severity IN ('critical', 'high') AND deleted_at IS NULL;
CREATE INDEX idx_package_security_scanned ON packages(security_scanned) WHERE deleted_at IS NULL;
COMMENT ON TABLE packages IS 'Core package metadata (optimized V2 schema)';
COMMENT ON COLUMN packages.access_count IS 'Total downloads (denormalized from stats)';
COMMENT ON COLUMN packages.vulnerability_count IS 'Number of vulnerabilities (denormalized)';
-- ============================================================================
-- TABLE: package_metadata
-- Purpose: Structured metadata (1:1 with packages, reduces main table size)
-- ============================================================================
CREATE TABLE IF NOT EXISTS package_metadata (
package_id BIGINT PRIMARY KEY REFERENCES packages(id) ON DELETE CASCADE,
author VARCHAR(255),
license VARCHAR(100),
homepage VARCHAR(512),
repository VARCHAR(512),
description TEXT,
keywords JSONB, -- Array of keywords
raw_metadata JSONB, -- Full metadata
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
CREATE INDEX idx_metadata_author ON package_metadata(author);
CREATE INDEX idx_metadata_license ON package_metadata(license);
CREATE INDEX idx_metadata_keywords ON package_metadata USING GIN(keywords);
CREATE INDEX idx_metadata_raw ON package_metadata USING GIN(raw_metadata);
COMMENT ON TABLE package_metadata IS 'Structured package metadata (separated for performance)';
-- ============================================================================
-- TABLE: vulnerabilities
-- Purpose: Normalized vulnerability data (each CVE stored once)
-- ============================================================================
CREATE TABLE IF NOT EXISTS vulnerabilities (
id BIGSERIAL PRIMARY KEY,
cve_id VARCHAR(50) UNIQUE NOT NULL,
title VARCHAR(512) NOT NULL,
description TEXT,
severity VARCHAR(20) NOT NULL, -- critical, high, medium, low
cvss REAL,
published_at TIMESTAMP NOT NULL,
fixed_version VARCHAR(100),
references JSONB, -- Array of URLs
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
CREATE UNIQUE INDEX idx_vuln_cve_id ON vulnerabilities(cve_id);
CREATE INDEX idx_vuln_severity ON vulnerabilities(severity);
CREATE INDEX idx_vuln_cvss ON vulnerabilities(cvss DESC NULLS LAST);
CREATE INDEX idx_vuln_published ON vulnerabilities(published_at DESC);
COMMENT ON TABLE vulnerabilities IS 'Normalized vulnerability data (99% storage reduction)';
-- ============================================================================
-- TABLE: package_vulnerabilities
-- Purpose: Many-to-many relationship between packages and vulnerabilities
-- ============================================================================
CREATE TABLE IF NOT EXISTS package_vulnerabilities (
id BIGSERIAL PRIMARY KEY,
package_id BIGINT NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
vulnerability_id BIGINT NOT NULL REFERENCES vulnerabilities(id) ON DELETE CASCADE,
scanner VARCHAR(50) NOT NULL,
detected_at TIMESTAMP NOT NULL DEFAULT NOW(),
bypassed BOOLEAN NOT NULL DEFAULT FALSE,
bypass_id BIGINT, -- References cve_bypasses.id (soft reference)
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
CREATE INDEX idx_pkg_vuln_package ON package_vulnerabilities(package_id) WHERE deleted_at IS NULL;
CREATE INDEX idx_pkg_vuln_vuln ON package_vulnerabilities(vulnerability_id) WHERE deleted_at IS NULL;
CREATE INDEX idx_pkg_vuln_composite ON package_vulnerabilities(package_id, vulnerability_id) WHERE deleted_at IS NULL;
CREATE INDEX idx_pkg_vuln_scanner ON package_vulnerabilities(scanner);
CREATE INDEX idx_pkg_vuln_bypassed ON package_vulnerabilities(bypassed) WHERE bypassed = FALSE;
-- ============================================================================
-- TABLE: scan_results
-- Purpose: Security scan results with severity breakdown
-- ============================================================================
CREATE TABLE IF NOT EXISTS scan_results (
id BIGSERIAL PRIMARY KEY,
package_id BIGINT NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
scanner VARCHAR(50) NOT NULL,
scanned_at TIMESTAMP NOT NULL DEFAULT NOW(),
status VARCHAR(20) NOT NULL, -- success, failed, pending
vuln_count INTEGER NOT NULL DEFAULT 0,
critical_count INTEGER NOT NULL DEFAULT 0,
high_count INTEGER NOT NULL DEFAULT 0,
medium_count INTEGER NOT NULL DEFAULT 0,
low_count INTEGER NOT NULL DEFAULT 0,
scan_duration INTEGER NOT NULL DEFAULT 0, -- milliseconds
details JSONB,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
CREATE INDEX idx_scan_package_scanner ON scan_results(package_id, scanner) WHERE deleted_at IS NULL;
CREATE INDEX idx_scan_scanned_at ON scan_results(scanned_at DESC);
CREATE INDEX idx_scan_status ON scan_results(status);
CREATE INDEX idx_scan_vuln_count ON scan_results(vuln_count) WHERE vuln_count > 0;
COMMENT ON TABLE scan_results IS 'Security scan results (optimized V2)';
-- ============================================================================
-- TABLE: cve_bypasses
-- Purpose: CVE bypass rules with usage tracking
-- ============================================================================
CREATE TABLE IF NOT EXISTS cve_bypasses (
id BIGSERIAL PRIMARY KEY,
type VARCHAR(20) NOT NULL, -- cve, package, registry
target VARCHAR(512) NOT NULL,
reason TEXT NOT NULL,
created_by VARCHAR(255) NOT NULL,
expires_at TIMESTAMP NOT NULL,
notify_on_expiry BOOLEAN NOT NULL DEFAULT FALSE,
active BOOLEAN NOT NULL DEFAULT TRUE,
usage_count BIGINT NOT NULL DEFAULT 0,
last_used_at TIMESTAMP,
registry_id INTEGER REFERENCES registries(id),
package_id BIGINT REFERENCES packages(id),
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMP
);
CREATE INDEX idx_bypass_type ON cve_bypasses(type);
CREATE INDEX idx_bypass_target ON cve_bypasses(target);
CREATE INDEX idx_bypass_active ON cve_bypasses(active) WHERE active = TRUE AND deleted_at IS NULL;
CREATE INDEX idx_bypass_expires_at ON cve_bypasses(expires_at) WHERE active = TRUE;
CREATE INDEX idx_bypass_created_by ON cve_bypasses(created_by);
COMMENT ON TABLE cve_bypasses IS 'CVE bypass rules with scope limiting';
-- ============================================================================
-- PARTITIONED TABLE: download_events
-- Purpose: High-volume time-series data (partitioned by month)
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_events (
id BIGSERIAL,
package_id BIGINT NOT NULL,
registry_id INTEGER NOT NULL,
downloaded_at TIMESTAMP NOT NULL,
user_agent VARCHAR(512),
ip_address VARCHAR(45),
authenticated BOOLEAN NOT NULL DEFAULT FALSE,
username VARCHAR(255)
) PARTITION BY RANGE (downloaded_at);
CREATE INDEX idx_download_events_package ON download_events(package_id, downloaded_at);
CREATE INDEX idx_download_events_registry ON download_events(registry_id);
CREATE INDEX idx_download_events_time ON download_events(downloaded_at);
COMMENT ON TABLE download_events IS 'Download events (partitioned by month for performance)';
-- Create partitions for current month ± 2 months
DO $$
DECLARE
start_date DATE;
end_date DATE;
partition_name TEXT;
i INTEGER;
BEGIN
FOR i IN -2..2 LOOP
start_date := date_trunc('month', NOW() + (i || ' months')::INTERVAL)::DATE;
end_date := (start_date + INTERVAL '1 month')::DATE;
partition_name := 'download_events_' || to_char(start_date, 'YYYY_MM');
EXECUTE format('CREATE TABLE IF NOT EXISTS %I PARTITION OF download_events FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(package_id, downloaded_at)',
partition_name || '_package_idx', partition_name);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(registry_id)',
partition_name || '_registry_idx', partition_name);
END LOOP;
END $$;
-- ============================================================================
-- TABLE: download_stats_hourly
-- Purpose: Pre-aggregated hourly statistics (1000x faster queries)
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_stats_hourly (
id BIGSERIAL PRIMARY KEY,
registry_id INTEGER NOT NULL REFERENCES registries(id),
package_id BIGINT REFERENCES packages(id), -- NULL = all packages in registry
time_bucket TIMESTAMP NOT NULL,
download_count BIGINT NOT NULL DEFAULT 0,
unique_ips BIGINT NOT NULL DEFAULT 0,
auth_downloads BIGINT NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX idx_stats_hourly_composite
ON download_stats_hourly(registry_id, COALESCE(package_id, 0), time_bucket);
CREATE INDEX idx_stats_hourly_time ON download_stats_hourly(time_bucket DESC);
COMMENT ON TABLE download_stats_hourly IS 'Hourly aggregated stats (pre-computed)';
-- ============================================================================
-- TABLE: download_stats_daily
-- Purpose: Pre-aggregated daily statistics with analytics
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_stats_daily (
id BIGSERIAL PRIMARY KEY,
registry_id INTEGER NOT NULL REFERENCES registries(id),
package_id BIGINT REFERENCES packages(id),
time_bucket TIMESTAMP NOT NULL,
download_count BIGINT NOT NULL DEFAULT 0,
unique_ips BIGINT NOT NULL DEFAULT 0,
auth_downloads BIGINT NOT NULL DEFAULT 0,
top_user_agents JSONB,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX idx_stats_daily_composite
ON download_stats_daily(registry_id, COALESCE(package_id, 0), time_bucket);
CREATE INDEX idx_stats_daily_time ON download_stats_daily(time_bucket DESC);
COMMENT ON TABLE download_stats_daily IS 'Daily aggregated stats with analytics';
-- ============================================================================
-- PARTITIONED TABLE: audit_log
-- Purpose: Audit trail for compliance (partitioned by month)
-- ============================================================================
CREATE TABLE IF NOT EXISTS audit_log (
id BIGSERIAL,
entity_type VARCHAR(50) NOT NULL,
entity_id BIGINT NOT NULL,
action VARCHAR(20) NOT NULL, -- create, update, delete
username VARCHAR(255) NOT NULL,
timestamp TIMESTAMP NOT NULL DEFAULT NOW(),
changes JSONB,
ip_address VARCHAR(45),
user_agent VARCHAR(512)
) PARTITION BY RANGE (timestamp);
CREATE INDEX idx_audit_log_entity ON audit_log(entity_type, entity_id);
CREATE INDEX idx_audit_log_username ON audit_log(username);
CREATE INDEX idx_audit_log_timestamp ON audit_log(timestamp DESC);
COMMENT ON TABLE audit_log IS 'Audit trail for compliance and debugging';
-- Create audit_log partitions
DO $$
DECLARE
start_date DATE;
end_date DATE;
partition_name TEXT;
i INTEGER;
BEGIN
FOR i IN -1..2 LOOP
start_date := date_trunc('month', NOW() + (i || ' months')::INTERVAL)::DATE;
end_date := (start_date + INTERVAL '1 month')::DATE;
partition_name := 'audit_log_' || to_char(start_date, 'YYYY_MM');
EXECUTE format('CREATE TABLE IF NOT EXISTS %I PARTITION OF audit_log FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(entity_type, entity_id)',
partition_name || '_entity_idx', partition_name);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(username)',
partition_name || '_user_idx', partition_name);
END LOOP;
END $$;
-- ============================================================================
-- FUNCTIONS: Automatic partition creation
-- ============================================================================
CREATE OR REPLACE FUNCTION create_next_month_partitions()
RETURNS void AS $$
DECLARE
next_month DATE := date_trunc('month', NOW() + INTERVAL '2 months');
partition_name TEXT;
start_date TEXT;
end_date TEXT;
BEGIN
-- Download events partition
partition_name := 'download_events_' || to_char(next_month, 'YYYY_MM');
start_date := to_char(next_month, 'YYYY-MM-DD');
end_date := to_char(next_month + INTERVAL '1 month', 'YYYY-MM-DD');
EXECUTE format('CREATE TABLE IF NOT EXISTS %I PARTITION OF download_events FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(package_id, downloaded_at)',
partition_name || '_package_idx', partition_name);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(registry_id)',
partition_name || '_registry_idx', partition_name);
-- Audit log partition
partition_name := 'audit_log_' || to_char(next_month, 'YYYY_MM');
EXECUTE format('CREATE TABLE IF NOT EXISTS %I PARTITION OF audit_log FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date);
EXECUTE format('CREATE INDEX IF NOT EXISTS %I ON %I(entity_type, entity_id)',
partition_name || '_entity_idx', partition_name);
RAISE NOTICE 'Created partitions for %', to_char(next_month, 'YYYY-MM');
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION create_next_month_partitions() IS 'Auto-create partitions for next month';
-- ============================================================================
-- TRIGGERS: Updated_at timestamp
-- ============================================================================
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_registries_updated_at BEFORE UPDATE ON registries
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_packages_updated_at BEFORE UPDATE ON packages
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_package_metadata_updated_at BEFORE UPDATE ON package_metadata
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- ============================================================================
-- SEED DATA: Default registries
-- ============================================================================
INSERT INTO registries (name, display_name, upstream_url, enabled, scan_by_default) VALUES
('npm', 'NPM Registry', 'https://registry.npmjs.org', TRUE, TRUE),
('pypi', 'PyPI', 'https://pypi.org', TRUE, TRUE),
('go', 'Go Modules', 'https://proxy.golang.org', TRUE, TRUE)
ON CONFLICT (name) DO NOTHING;
-- ============================================================================
-- VIEWS: Convenience views for common queries
-- ============================================================================
CREATE OR REPLACE VIEW v_vulnerable_packages AS
SELECT
r.name AS registry,
p.name,
p.version,
p.vulnerability_count,
p.highest_severity,
p.last_scanned_at
FROM packages p
JOIN registries r ON p.registry_id = r.id
WHERE p.vulnerability_count > 0 AND p.deleted_at IS NULL
ORDER BY
CASE p.highest_severity
WHEN 'critical' THEN 1
WHEN 'high' THEN 2
WHEN 'medium' THEN 3
WHEN 'low' THEN 4
ELSE 5
END,
p.vulnerability_count DESC;
COMMENT ON VIEW v_vulnerable_packages IS 'All packages with vulnerabilities (sorted by severity)';
-- ============================================================================
-- COMPLETE
-- ============================================================================
SELECT 'Schema V2 created successfully!' AS status;