Files
gohoarder/migrations/001_create_schema_v2_mysql.sql
T
lukaszraczylo c0061b99e3 chore(schema): migrate to GORM V2 with multi-database support
- [x] Implement GORM V2 metadata store with SQLite, PostgreSQL, and MySQL support
- [x] Add database migration system using gormigrate for schema versioning
- [x] Create migration CLI tool with support for migrate, rollback, and status commands
- [x] Add Docker support for migration container (Dockerfile.migrate)
- [x] Implement automatic partition management for PostgreSQL time-series tables
- [x] Add background aggregation worker for download statistics
- [x] Support connection pooling configuration (max_open_conns, max_idle_conns, conn_max_lifetime)
- [x] Add blocking mechanism based on vulnerability thresholds in stats and handlers
- [x] Update Helm charts with migration init containers and multi-database configuration
- [x] Replace deprecated SQLite store with optimized GORM implementation
- [x] Add comprehensive integration tests for MySQL and PostgreSQL
- [x] Update frontend to display blocked packages and storage utilization
- [x] Add goreleaser configuration for migrate binary and container image
- [x] Update configuration examples with database backend options and recommendations
2026-01-03 20:44:23 +00:00

368 lines
16 KiB
SQL

-- GoHoarder Database Schema V2 - MySQL/MariaDB
-- Optimized for multi-user production deployments
-- Created: 2026-01-03
-- Requires: MySQL 8.0+ or MariaDB 10.5+
-- Set charset and collation
SET NAMES utf8mb4;
SET CHARACTER SET utf8mb4;
-- ============================================================================
-- TABLE: registries
-- Purpose: Normalized registry data (eliminates repeated strings)
-- ============================================================================
CREATE TABLE IF NOT EXISTS registries (
id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
display_name VARCHAR(100) NOT NULL,
upstream_url VARCHAR(512) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
scan_by_default BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
INDEX idx_registry_name (name),
INDEX idx_registry_enabled (enabled, deleted_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: packages
-- Purpose: Core package metadata with denormalized counts for performance
-- ============================================================================
CREATE TABLE IF NOT EXISTS packages (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
registry_id INT UNSIGNED NOT NULL,
name VARCHAR(255) NOT NULL,
version VARCHAR(100) NOT NULL,
-- Storage information
storage_key VARCHAR(512) UNIQUE NOT NULL,
size BIGINT NOT NULL,
checksum_md5 CHAR(32),
checksum_sha256 CHAR(64),
upstream_url VARCHAR(1024),
-- Cache management
cached_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_accessed TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP NULL,
access_count BIGINT NOT NULL DEFAULT 0,
-- Security (denormalized for performance)
security_scanned BOOLEAN NOT NULL DEFAULT FALSE,
last_scanned_at TIMESTAMP NULL,
vulnerability_count INT NOT NULL DEFAULT 0,
highest_severity VARCHAR(20),
-- Authentication
requires_auth BOOLEAN NOT NULL DEFAULT FALSE,
auth_provider VARCHAR(50),
-- Audit trail
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE RESTRICT ON UPDATE CASCADE,
UNIQUE INDEX idx_package_registry_name_version (registry_id, name, version, deleted_at),
INDEX idx_package_storage_key (storage_key),
INDEX idx_package_name (name(50)),
INDEX idx_package_last_accessed (last_accessed DESC),
INDEX idx_package_expires_at (expires_at),
INDEX idx_package_access_count (access_count DESC),
INDEX idx_package_size (size DESC),
INDEX idx_package_vuln_count (vulnerability_count),
INDEX idx_package_severity (highest_severity),
INDEX idx_package_security_scanned (security_scanned, deleted_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: package_metadata
-- Purpose: Structured metadata (1:1 with packages)
-- ============================================================================
CREATE TABLE IF NOT EXISTS package_metadata (
package_id BIGINT UNSIGNED PRIMARY KEY,
author VARCHAR(255),
license VARCHAR(100),
homepage VARCHAR(512),
repository VARCHAR(512),
description TEXT,
keywords JSON, -- JSON array for MySQL 8.0+
raw_metadata JSON, -- Full metadata as JSON
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_metadata_author (author(100)),
INDEX idx_metadata_license (license)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: vulnerabilities
-- Purpose: Normalized vulnerability data (each CVE stored once)
-- ============================================================================
CREATE TABLE IF NOT EXISTS vulnerabilities (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
cve_id VARCHAR(50) UNIQUE NOT NULL,
title VARCHAR(512) NOT NULL,
description TEXT,
severity VARCHAR(20) NOT NULL,
cvss FLOAT,
published_at TIMESTAMP NOT NULL,
fixed_version VARCHAR(100),
references JSON,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
UNIQUE INDEX idx_vuln_cve_id (cve_id),
INDEX idx_vuln_severity (severity),
INDEX idx_vuln_cvss (cvss DESC),
INDEX idx_vuln_published (published_at DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: package_vulnerabilities
-- Purpose: Many-to-many relationship between packages and vulnerabilities
-- ============================================================================
CREATE TABLE IF NOT EXISTS package_vulnerabilities (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
package_id BIGINT UNSIGNED NOT NULL,
vulnerability_id BIGINT UNSIGNED NOT NULL,
scanner VARCHAR(50) NOT NULL,
detected_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
bypassed BOOLEAN NOT NULL DEFAULT FALSE,
bypass_id BIGINT UNSIGNED,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (vulnerability_id) REFERENCES vulnerabilities(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_pkg_vuln_package (package_id, deleted_at),
INDEX idx_pkg_vuln_vuln (vulnerability_id, deleted_at),
INDEX idx_pkg_vuln_composite (package_id, vulnerability_id, deleted_at),
INDEX idx_pkg_vuln_scanner (scanner),
INDEX idx_pkg_vuln_bypassed (bypassed)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: scan_results
-- Purpose: Security scan results with severity breakdown
-- ============================================================================
CREATE TABLE IF NOT EXISTS scan_results (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
package_id BIGINT UNSIGNED NOT NULL,
scanner VARCHAR(50) NOT NULL,
scanned_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(20) NOT NULL,
vuln_count INT NOT NULL DEFAULT 0,
critical_count INT NOT NULL DEFAULT 0,
high_count INT NOT NULL DEFAULT 0,
medium_count INT NOT NULL DEFAULT 0,
low_count INT NOT NULL DEFAULT 0,
scan_duration INT NOT NULL DEFAULT 0,
details JSON,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_scan_package_scanner (package_id, scanner, deleted_at),
INDEX idx_scan_scanned_at (scanned_at DESC),
INDEX idx_scan_status (status),
INDEX idx_scan_vuln_count (vuln_count)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: cve_bypasses
-- Purpose: CVE bypass rules with usage tracking
-- ============================================================================
CREATE TABLE IF NOT EXISTS cve_bypasses (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
type VARCHAR(20) NOT NULL,
target VARCHAR(512) NOT NULL,
reason TEXT NOT NULL,
created_by VARCHAR(255) NOT NULL,
expires_at TIMESTAMP NOT NULL,
notify_on_expiry BOOLEAN NOT NULL DEFAULT FALSE,
active BOOLEAN NOT NULL DEFAULT TRUE,
usage_count BIGINT NOT NULL DEFAULT 0,
last_used_at TIMESTAMP NULL,
registry_id INT UNSIGNED,
package_id BIGINT UNSIGNED,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
deleted_at TIMESTAMP NULL,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE SET NULL ON UPDATE CASCADE,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE SET NULL ON UPDATE CASCADE,
INDEX idx_bypass_type (type),
INDEX idx_bypass_target (target(100)),
INDEX idx_bypass_active (active, deleted_at),
INDEX idx_bypass_expires_at (expires_at, active),
INDEX idx_bypass_created_by (created_by(100))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: download_events
-- Purpose: High-volume time-series data
-- Note: MySQL doesn't support native partitioning as elegantly as PostgreSQL
-- Consider manual partitioning or TimescaleDB if needed
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_events (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
package_id BIGINT UNSIGNED NOT NULL,
registry_id INT UNSIGNED NOT NULL,
downloaded_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
user_agent VARCHAR(512),
ip_address VARCHAR(45),
authenticated BOOLEAN NOT NULL DEFAULT FALSE,
username VARCHAR(255),
INDEX idx_download_events_package (package_id, downloaded_at),
INDEX idx_download_events_registry (registry_id),
INDEX idx_download_events_time (downloaded_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: download_stats_hourly
-- Purpose: Pre-aggregated hourly statistics
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_stats_hourly (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
registry_id INT UNSIGNED NOT NULL,
package_id BIGINT UNSIGNED,
time_bucket TIMESTAMP NOT NULL,
download_count BIGINT NOT NULL DEFAULT 0,
unique_ips BIGINT NOT NULL DEFAULT 0,
auth_downloads BIGINT NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
UNIQUE INDEX idx_stats_hourly_composite (registry_id, IFNULL(package_id, 0), time_bucket),
INDEX idx_stats_hourly_time (time_bucket DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: download_stats_daily
-- Purpose: Pre-aggregated daily statistics
-- ============================================================================
CREATE TABLE IF NOT EXISTS download_stats_daily (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
registry_id INT UNSIGNED NOT NULL,
package_id BIGINT UNSIGNED,
time_bucket TIMESTAMP NOT NULL,
download_count BIGINT NOT NULL DEFAULT 0,
unique_ips BIGINT NOT NULL DEFAULT 0,
auth_downloads BIGINT NOT NULL DEFAULT 0,
top_user_agents JSON,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (package_id) REFERENCES packages(id) ON DELETE CASCADE ON UPDATE CASCADE,
UNIQUE INDEX idx_stats_daily_composite (registry_id, IFNULL(package_id, 0), time_bucket),
INDEX idx_stats_daily_time (time_bucket DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- TABLE: audit_log
-- Purpose: Audit trail for compliance
-- ============================================================================
CREATE TABLE IF NOT EXISTS audit_log (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
entity_type VARCHAR(50) NOT NULL,
entity_id BIGINT NOT NULL,
action VARCHAR(20) NOT NULL,
username VARCHAR(255) NOT NULL,
timestamp TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
changes JSON,
ip_address VARCHAR(45),
user_agent VARCHAR(512),
INDEX idx_audit_log_entity (entity_type, entity_id),
INDEX idx_audit_log_username (username(100)),
INDEX idx_audit_log_timestamp (timestamp DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- ============================================================================
-- SEED DATA: Default registries
-- ============================================================================
INSERT INTO registries (name, display_name, upstream_url, enabled, scan_by_default) VALUES
('npm', 'NPM Registry', 'https://registry.npmjs.org', TRUE, TRUE),
('pypi', 'PyPI', 'https://pypi.org', TRUE, TRUE),
('go', 'Go Modules', 'https://proxy.golang.org', TRUE, TRUE)
ON DUPLICATE KEY UPDATE
display_name = VALUES(display_name),
upstream_url = VALUES(upstream_url);
-- ============================================================================
-- VIEWS: Convenience views for common queries
-- ============================================================================
CREATE OR REPLACE VIEW v_vulnerable_packages AS
SELECT
r.name AS registry,
p.name,
p.version,
p.vulnerability_count,
p.highest_severity,
p.last_scanned_at
FROM packages p
JOIN registries r ON p.registry_id = r.id
WHERE p.vulnerability_count > 0 AND p.deleted_at IS NULL
ORDER BY
CASE p.highest_severity
WHEN 'critical' THEN 1
WHEN 'high' THEN 2
WHEN 'medium' THEN 3
WHEN 'low' THEN 4
ELSE 5
END,
p.vulnerability_count DESC;
-- ============================================================================
-- PERFORMANCE TUNING RECOMMENDATIONS
-- ============================================================================
-- Set InnoDB buffer pool size to 50-70% of RAM
-- SET GLOBAL innodb_buffer_pool_size = 4294967296; -- 4GB
-- Enable query cache (MySQL 5.7 and earlier)
-- SET GLOBAL query_cache_type = 1;
-- SET GLOBAL query_cache_size = 67108864; -- 64MB
-- Optimize for SSD
-- SET GLOBAL innodb_flush_log_at_trx_commit = 2;
-- SET GLOBAL innodb_io_capacity = 2000;
-- ============================================================================
-- COMPLETE
-- ============================================================================
SELECT 'Schema V2 created successfully for MySQL/MariaDB!' AS status;