diff --git a/.github/workflows/autoupdate.yaml b/.github/workflows/autoupdate.yaml new file mode 100644 index 0000000..dda0328 --- /dev/null +++ b/.github/workflows/autoupdate.yaml @@ -0,0 +1,19 @@ +name: Autoupdate go.mod and go.sum + +on: + workflow_dispatch: + schedule: + - cron: "0 3 * * *" + +permissions: + contents: write + actions: write + pull-requests: write + +jobs: + autoupdate: + uses: lukaszraczylo/shared-actions/.github/workflows/go-autoupdate.yaml@main + with: + go-version: ">=1.25" + release-workflow: "release.yaml" + secrets: inherit diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 0000000..cd9ba69 --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,16 @@ +name: Pull Request + +on: + pull_request: + branches: + - main + push: + branches: + - "**" + - "!main" + +jobs: + pr-checks: + uses: lukaszraczylo/shared-actions/.github/workflows/go-pr.yaml@main + with: + go-version: "1.25" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..0e7a2de --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,68 @@ +name: Release + +on: + workflow_dispatch: + push: + paths-ignore: + - "**.md" + - "**/release.yaml" + - "frontend/**" + - "deployments/**" + - "docs/**" + branches: + - main + +permissions: + id-token: write + contents: write + packages: write + deployments: write + +jobs: + release: + uses: lukaszraczylo/shared-actions/.github/workflows/go-release.yaml@main + with: + go-version: "1.25" + docker-enabled: true + secrets: inherit + + benchmark: + name: Publish Benchmarks + needs: release + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ref: main + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: "1.25" + + - name: Run benchmarks + run: go test -bench=. -benchmem ./... -run=^# | tee output.txt + + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: "go" + output-file-path: output.txt + fail-on-alert: true + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-on-alert: true + summary-always: true + auto-push: false + benchmark-data-dir-path: "docs/bench" + + - name: Push benchmark results + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add docs/bench + git diff --staged --quiet || git commit -m "Update benchmark results" + git push origin main diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..99a8d22 --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,205 @@ +version: 2 + +# Project metadata +project_name: gohoarder + +# Pre-release hooks +before: + hooks: + - go mod tidy + # Generate semantic version if not provided via git tag + # This script can be used by CI/CD to inject custom versions + # Usage: export GORELEASER_CURRENT_TAG=$(./script/generate-version.sh) + # - ./script/generate-version.sh + +# Build configuration +builds: + - id: gohoarder + main: ./cmd/gohoarder + binary: gohoarder + env: + - CGO_ENABLED=0 + goos: + - linux + - darwin + - windows + goarch: + - amd64 + - arm64 + ldflags: + - -s -w + - -X github.com/lukaszraczylo/gohoarder/internal/version.Version={{.Version}} + - -X github.com/lukaszraczylo/gohoarder/internal/version.GitCommit={{.ShortCommit}} + - -X github.com/lukaszraczylo/gohoarder/internal/version.BuildTime={{.Date}} + +# Archives for releases +archives: + - id: default + name_template: >- + {{ .ProjectName }}_ + {{- title .Os }}_ + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + formats: + - tar.gz + - zip + format_overrides: + - goos: windows + formats: + - zip + files: + - README.md + - LICENSE + - config.yaml.example + +# Checksum +checksum: + name_template: 'checksums.txt' + algorithm: sha256 + +# Snapshot configuration +snapshot: + version_template: "{{ incpatch .Version }}-next" + +# Changelog +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' + - '^ci:' + - '^chore:' + - '^Merge' + - '^WIP' + - '^Update go.mod' + - 'README' + +# GitHub release configuration +release: + github: + owner: lukaszraczylo + name: gohoarder + name_template: "version {{.Version}}" + draft: false + prerelease: auto + +# Docker images (v2 - modern syntax) +dockers_v2: + # 1. Application Engine - Main GoHoarder server + - id: gohoarder-server + ids: + - gohoarder + images: + - ghcr.io/lukaszraczylo/gohoarder-server + tags: + - "{{ .Version }}" + - latest + platforms: + - linux/amd64 + - linux/arm64 + dockerfile: Dockerfile.server + labels: + org.opencontainers.image.title: GoHoarder Server + org.opencontainers.image.description: Universal package cache proxy server + org.opencontainers.image.url: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.source: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.version: "{{ .Version }}" + org.opencontainers.image.created: "{{ .Date }}" + org.opencontainers.image.revision: "{{ .FullCommit }}" + extra_files: + - config.yaml.example + + # 2. Website - Frontend Dashboard + - id: gohoarder-frontend + ids: + - gohoarder + images: + - ghcr.io/lukaszraczylo/gohoarder-frontend + tags: + - "{{ .Version }}" + - latest + platforms: + - linux/amd64 + - linux/arm64 + dockerfile: Dockerfile.frontend + labels: + org.opencontainers.image.title: GoHoarder Frontend + org.opencontainers.image.description: GoHoarder web dashboard + org.opencontainers.image.url: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.source: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.version: "{{ .Version }}" + org.opencontainers.image.created: "{{ .Date }}" + org.opencontainers.image.revision: "{{ .FullCommit }}" + extra_files: + - frontend + + # 3. Scanning Engine - Background scanner worker + - id: gohoarder-scanner + ids: + - gohoarder + images: + - ghcr.io/lukaszraczylo/gohoarder-scanner + tags: + - "{{ .Version }}" + - latest + platforms: + - linux/amd64 + - linux/arm64 + dockerfile: Dockerfile.scanner + labels: + org.opencontainers.image.title: GoHoarder Scanner + org.opencontainers.image.description: GoHoarder vulnerability scanning engine + org.opencontainers.image.url: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.source: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.version: "{{ .Version }}" + org.opencontainers.image.created: "{{ .Date }}" + org.opencontainers.image.revision: "{{ .FullCommit }}" + extra_files: + - config.yaml.example + + # 4. Gateway - Nginx reverse proxy for unified deployment + - id: gohoarder-gateway + ids: + - gohoarder + images: + - ghcr.io/lukaszraczylo/gohoarder-gateway + tags: + - "{{ .Version }}" + - latest + platforms: + - linux/amd64 + - linux/arm64 + dockerfile: Dockerfile.gateway + labels: + org.opencontainers.image.title: GoHoarder Gateway + org.opencontainers.image.description: Nginx reverse proxy for unified GoHoarder deployment + org.opencontainers.image.url: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.source: https://github.com/lukaszraczylo/gohoarder + org.opencontainers.image.version: "{{ .Version }}" + org.opencontainers.image.created: "{{ .Date }}" + org.opencontainers.image.revision: "{{ .FullCommit }}" + +# Artifact signing with cosign +signs: + - cmd: cosign + signature: "${artifact}.sigstore.json" + args: + - sign-blob + - "--bundle=${signature}" + - "${artifact}" + - "--yes" + artifacts: checksum + output: true + +# Docker image signing with cosign +docker_signs: + - cmd: cosign + artifacts: manifests + output: true + args: + - sign + - "${artifact}@${digest}" + - "--yes" diff --git a/Dockerfile.frontend b/Dockerfile.frontend new file mode 100644 index 0000000..c4f3175 --- /dev/null +++ b/Dockerfile.frontend @@ -0,0 +1,100 @@ +# Website - Frontend Dashboard +# Build stage +FROM node:20-alpine AS builder + +WORKDIR /build + +# Copy frontend source +COPY frontend/package.json frontend/pnpm-lock.yaml ./ +COPY frontend/ ./ + +# Install pnpm and dependencies +RUN npm install -g pnpm && \ + pnpm install --frozen-lockfile + +# Build the frontend +RUN pnpm run build + +# Production stage +FROM nginx:alpine + +# Install envsubst for runtime configuration +RUN apk add --no-cache gettext + +# Copy built frontend +COPY --from=builder /build/dist /usr/share/nginx/html + +# Create runtime config injection script +RUN cat > /docker-entrypoint.d/40-inject-config.sh <<'EOF' +#!/bin/sh +set -e + +# Create runtime configuration file +cat > /usr/share/nginx/html/config.js < /usr/share/nginx/html/config.tmp.js +mv /usr/share/nginx/html/config.tmp.js /usr/share/nginx/html/config.js + +echo "Runtime configuration injected:" +cat /usr/share/nginx/html/config.js +EOF + +RUN chmod +x /docker-entrypoint.d/40-inject-config.sh + +# Copy nginx configuration +RUN cat > /etc/nginx/conf.d/default.conf <<'EOF' +server { + listen 80; + server_name _; + root /usr/share/nginx/html; + index index.html; + + # Compression + gzip on; + gzip_types text/plain text/css application/json application/javascript text/xml application/xml text/javascript; + + # SPA routing + location / { + try_files $uri $uri/ /index.html; + } + + # Runtime configuration endpoint + location = /config.js { + add_header Cache-Control "no-cache, no-store, must-revalidate"; + add_header Pragma "no-cache"; + add_header Expires "0"; + } + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; + + # Cache static assets + location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + } +} +EOF + +# Expose port +EXPOSE 80 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD wget --quiet --tries=1 --spider http://localhost/ || exit 1 + +# Environment variables with defaults +ENV API_BASE_URL=/api \ + APP_VERSION=unknown \ + APP_NAME=GoHoarder + +CMD ["nginx", "-g", "daemon off;"] diff --git a/Dockerfile.gateway b/Dockerfile.gateway new file mode 100644 index 0000000..4a5a5bd --- /dev/null +++ b/Dockerfile.gateway @@ -0,0 +1,197 @@ +# Gateway - Nginx reverse proxy for unified deployment +# Routes traffic between frontend and backend under single vhost +FROM nginx:alpine + +# Install envsubst for runtime configuration +RUN apk add --no-cache gettext + +# Copy nginx configuration template +COPY <<'EOF' /etc/nginx/templates/default.conf.template +# Upstream servers +upstream backend { + server ${BACKEND_HOST}:${BACKEND_PORT}; + keepalive 32; +} + +upstream frontend { + server ${FRONTEND_HOST}:${FRONTEND_PORT}; + keepalive 32; +} + +# Rate limiting zones +limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s; +limit_req_zone $binary_remote_addr zone=download_limit:10m rate=5r/s; + +# Cache configuration +proxy_cache_path /var/cache/nginx/static levels=1:2 keys_zone=static_cache:10m max_size=100m inactive=60m use_temp_path=off; + +server { + listen 80; + server_name ${SERVER_NAME}; + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + + # Client body size for package uploads + client_max_body_size 500M; + client_body_timeout 300s; + + # Logging + access_log /var/log/nginx/access.log combined; + error_log /var/log/nginx/error.log warn; + + # API endpoints - proxy to backend + location /api/ { + # Rate limiting + limit_req zone=api_limit burst=20 nodelay; + + # Proxy settings + proxy_pass http://backend/; + proxy_http_version 1.1; + + # Headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Port $server_port; + + # Connection reuse + proxy_set_header Connection ""; + + # Timeouts for long-running operations + proxy_connect_timeout 60s; + proxy_send_timeout 300s; + proxy_read_timeout 300s; + + # Buffer settings + proxy_buffering on; + proxy_buffer_size 4k; + proxy_buffers 8 4k; + proxy_busy_buffers_size 8k; + } + + # Health check endpoint + location /health { + proxy_pass http://backend/health; + proxy_http_version 1.1; + proxy_set_header Connection ""; + access_log off; + } + + # Metrics endpoint (optional - may want to restrict access) + location /metrics { + # Uncomment to restrict to internal networks + # allow 10.0.0.0/8; + # allow 172.16.0.0/12; + # allow 192.168.0.0/16; + # deny all; + + proxy_pass http://backend/metrics; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + # Package download endpoints with rate limiting + location ~ ^/(npm|pypi|go)/ { + limit_req zone=download_limit burst=10 nodelay; + + proxy_pass http://backend; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Connection ""; + + # Extended timeouts for package downloads + proxy_connect_timeout 60s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + + # Large buffer for package downloads + proxy_buffering on; + proxy_buffer_size 128k; + proxy_buffers 4 256k; + proxy_busy_buffers_size 256k; + } + + # Frontend - serve SPA + location / { + proxy_pass http://frontend; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Connection ""; + + # Cache static assets + proxy_cache static_cache; + proxy_cache_valid 200 1h; + proxy_cache_bypass $http_cache_control; + add_header X-Cache-Status $upstream_cache_status; + } + + # WebSocket support (if needed for future features) + location /ws/ { + proxy_pass http://backend/ws/; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # WebSocket timeouts + proxy_connect_timeout 7d; + proxy_send_timeout 7d; + proxy_read_timeout 7d; + } +} + +# HTTPS server (uncomment and configure SSL certificates) +# server { +# listen 443 ssl http2; +# server_name ${SERVER_NAME}; +# +# ssl_certificate /etc/nginx/ssl/cert.pem; +# ssl_certificate_key /etc/nginx/ssl/key.pem; +# +# # SSL configuration +# ssl_protocols TLSv1.2 TLSv1.3; +# ssl_ciphers HIGH:!aNULL:!MD5; +# ssl_prefer_server_ciphers on; +# ssl_session_cache shared:SSL:10m; +# ssl_session_timeout 10m; +# +# # Include all location blocks from above +# # ... (copy from HTTP server block) +# } +EOF + +# Create cache directory +RUN mkdir -p /var/cache/nginx/static && \ + chown -R nginx:nginx /var/cache/nginx + +# Expose port +EXPOSE 80 443 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD wget --quiet --tries=1 --spider http://localhost/health || exit 1 + +# Environment variables with defaults +ENV BACKEND_HOST=gohoarder-server \ + BACKEND_PORT=8080 \ + FRONTEND_HOST=gohoarder-frontend \ + FRONTEND_PORT=80 \ + SERVER_NAME=_ + +# Use nginx with template substitution +CMD ["/bin/sh", "-c", "envsubst '$$BACKEND_HOST $$BACKEND_PORT $$FRONTEND_HOST $$FRONTEND_PORT $$SERVER_NAME' < /etc/nginx/templates/default.conf.template > /etc/nginx/conf.d/default.conf && nginx -g 'daemon off;'"] diff --git a/Dockerfile.scanner b/Dockerfile.scanner new file mode 100644 index 0000000..cd98d8c --- /dev/null +++ b/Dockerfile.scanner @@ -0,0 +1,58 @@ +# Scanning Engine - Background Scanner Worker +FROM alpine:latest + +# Install scanning tools and runtime dependencies +RUN apk add --no-cache \ + ca-certificates \ + tzdata \ + git \ + curl \ + wget \ + bash \ + && update-ca-certificates + +# Install Trivy for container scanning +RUN wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | \ + wget -O /tmp/trivy.tar.gz https://github.com/aquasecurity/trivy/releases/latest/download/trivy_$(uname -s)_$(uname -m).tar.gz && \ + tar -xzf /tmp/trivy.tar.gz -C /usr/local/bin && \ + rm /tmp/trivy.tar.gz && \ + chmod +x /usr/local/bin/trivy + +# Install Grype for vulnerability scanning +RUN wget -qO - https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin + +# Create non-root user +RUN addgroup -g 1000 scanner && \ + adduser -D -u 1000 -G scanner scanner + +# Create necessary directories +RUN mkdir -p /data/cache /data/scans && \ + chown -R scanner:scanner /data + +# Copy binary +COPY gohoarder /usr/local/bin/gohoarder +RUN chmod +x /usr/local/bin/gohoarder + +# Copy example config +COPY config.yaml.example /etc/gohoarder/config.yaml.example + +WORKDIR /data +USER scanner + +# Expose metrics port +EXPOSE 9091 + +# Health check +HEALTHCHECK --interval=60s --timeout=30s --start-period=10s --retries=3 \ + CMD ["/usr/local/bin/gohoarder", "version"] || exit 1 + +# Environment variables for scanner mode +ENV SCANNER_MODE=true \ + SCANNER_WORKERS=4 \ + SCANNER_INTERVAL=300 + +# Run the scanner in background mode +# Note: You may need to add a scanner-specific command to your CLI +# For now, this assumes the serve command can run in scanner mode +ENTRYPOINT ["/usr/local/bin/gohoarder"] +CMD ["serve", "--scanner-only"] diff --git a/Dockerfile.server b/Dockerfile.server new file mode 100644 index 0000000..6a8d039 --- /dev/null +++ b/Dockerfile.server @@ -0,0 +1,39 @@ +# Application Engine - GoHoarder Server +FROM alpine:latest + +# Install runtime dependencies +RUN apk add --no-cache \ + ca-certificates \ + tzdata \ + && update-ca-certificates + +# Create non-root user +RUN addgroup -g 1000 gohoarder && \ + adduser -D -u 1000 -G gohoarder gohoarder + +# Create necessary directories +RUN mkdir -p /data/cache /data/metadata && \ + chown -R gohoarder:gohoarder /data + +# Copy binary +COPY gohoarder /usr/local/bin/gohoarder +RUN chmod +x /usr/local/bin/gohoarder + +# Copy example config +COPY config.yaml.example /etc/gohoarder/config.yaml.example + +WORKDIR /data +USER gohoarder + +# Expose ports +# 8080: Main proxy port +# 9090: Metrics/health port +EXPOSE 8080 9090 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD ["/usr/local/bin/gohoarder", "version"] || exit 1 + +# Run the server +ENTRYPOINT ["/usr/local/bin/gohoarder"] +CMD ["serve"] diff --git a/Makefile b/Makefile index bd23ec4..19c9b16 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ BINARY_NAME=gohoarder BINARY_PATH=bin/$(BINARY_NAME) CMD_PATH=./cmd/gohoarder -VERSION?=dev +# Generate semantic version using script, fallback to 'dev' if script fails +VERSION?=$(shell ./script/generate-version.sh 2>/dev/null || echo "dev") GIT_COMMIT=$(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") BUILD_TIME=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ') LDFLAGS=-ldflags "-X github.com/lukaszraczylo/gohoarder/internal/version.Version=$(VERSION) \ diff --git a/deployments/kubernetes/configmap-config.yaml b/deployments/kubernetes/configmap-config.yaml new file mode 100644 index 0000000..d349f70 --- /dev/null +++ b/deployments/kubernetes/configmap-config.yaml @@ -0,0 +1,54 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: gohoarder-config + namespace: default +data: + config.yaml: | + server: + host: "0.0.0.0" + port: 8080 + read_timeout: 30s + write_timeout: 30s + + cache: + max_size_bytes: 10737418240 # 10GB + default_ttl: 24h + cleanup_interval: 1h + + storage: + backend: filesystem + path: /var/lib/gohoarder/cache + + metadata: + backend: sqlite + connection: /var/lib/gohoarder/gohoarder.db + + security: + enabled: true + providers: + - osv + - github + severity_threshold: medium + block_on_vulnerability: false + rescan_interval: 24h + + handlers: + npm: + enabled: true + upstream_registry: "https://registry.npmjs.org" + + pypi: + enabled: true + upstream_index: "https://pypi.org/simple" + + go: + enabled: true + upstream_proxy: "https://proxy.golang.org" + checksum_db: "https://sum.golang.org" + # Path to git credentials file (mounted from Secret) + git_credentials_file: /etc/gohoarder/git-credentials.json + + logging: + level: info + format: json diff --git a/deployments/kubernetes/deployment-all-in-one.yaml b/deployments/kubernetes/deployment-all-in-one.yaml new file mode 100644 index 0000000..22eec32 --- /dev/null +++ b/deployments/kubernetes/deployment-all-in-one.yaml @@ -0,0 +1,502 @@ +# GoHoarder - Kubernetes Deployment (All-in-One) +# This manifest deploys all GoHoarder services under a single ingress +# +# Usage: +# kubectl create namespace gohoarder +# kubectl apply -f deployment-all-in-one.yaml -n gohoarder +# +# Prerequisites: +# - Kubernetes 1.19+ +# - Ingress controller (nginx, traefik, etc.) +# - Persistent volume provisioner +# - Optional: cert-manager for TLS certificates + +--- +# Namespace +apiVersion: v1 +kind: Namespace +metadata: + name: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: namespace + +--- +# ConfigMap for application configuration +apiVersion: v1 +kind: ConfigMap +metadata: + name: gohoarder-config + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: config +data: + # Add your configuration here or mount from a file + # config.yaml: | + # server: + # port: 8080 + # ... + +--- +# PersistentVolumeClaim for cache storage +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: gohoarder-cache + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: storage +spec: + accessModes: + - ReadWriteMany # Multiple pods can access for scanner + server + resources: + requests: + storage: 100Gi + # storageClassName: your-storage-class # Specify your storage class + +--- +# PersistentVolumeClaim for metadata storage +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: gohoarder-metadata + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: storage +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + # storageClassName: your-storage-class + +--- +# Deployment - Application Server +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gohoarder-server + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: server +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: server + template: + metadata: + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: server + spec: + containers: + - name: server + image: ghcr.io/lukaszraczylo/gohoarder-server:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 8080 + protocol: TCP + - name: metrics + containerPort: 9090 + protocol: TCP + env: + - name: CONFIG_FILE + value: /config/config.yaml + - name: STORAGE_BACKEND + value: filesystem + - name: STORAGE_PATH + value: /data/cache + - name: DB_PATH + value: /data/metadata/gohoarder.db + - name: LOG_LEVEL + value: info + - name: LOG_FORMAT + value: json + volumeMounts: + - name: cache + mountPath: /data/cache + - name: metadata + mountPath: /data/metadata + - name: config + mountPath: /config + readOnly: true + livenessProbe: + exec: + command: + - /usr/local/bin/gohoarder + - version + initialDelaySeconds: 5 + periodSeconds: 30 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: 2000m + memory: 2Gi + volumes: + - name: cache + persistentVolumeClaim: + claimName: gohoarder-cache + - name: metadata + persistentVolumeClaim: + claimName: gohoarder-metadata + - name: config + configMap: + name: gohoarder-config + +--- +# Service - Application Server +apiVersion: v1 +kind: Service +metadata: + name: gohoarder-server + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: server +spec: + type: ClusterIP + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP + - name: metrics + port: 9090 + targetPort: metrics + protocol: TCP + selector: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: server + +--- +# Deployment - Frontend +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gohoarder-frontend + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: frontend +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: frontend + template: + metadata: + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: frontend + spec: + containers: + - name: frontend + image: ghcr.io/lukaszraczylo/gohoarder-frontend:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 80 + protocol: TCP + env: + - name: API_BASE_URL + value: /api + - name: APP_VERSION + value: "1.0.0" + - name: APP_NAME + value: GoHoarder + livenessProbe: + httpGet: + path: / + port: 80 + initialDelaySeconds: 5 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 80 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 256Mi + +--- +# Service - Frontend +apiVersion: v1 +kind: Service +metadata: + name: gohoarder-frontend + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: frontend +spec: + type: ClusterIP + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP + selector: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: frontend + +--- +# Deployment - Scanner (Optional) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gohoarder-scanner + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: scanner +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: scanner + template: + metadata: + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: scanner + spec: + containers: + - name: scanner + image: ghcr.io/lukaszraczylo/gohoarder-scanner:latest + imagePullPolicy: Always + env: + - name: CONFIG_FILE + value: /config/config.yaml + - name: SCANNER_MODE + value: "true" + - name: SCANNER_WORKERS + value: "4" + - name: LOG_LEVEL + value: info + volumeMounts: + - name: cache + mountPath: /data/cache + readOnly: true + - name: metadata + mountPath: /data/metadata + - name: config + mountPath: /config + readOnly: true + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + volumes: + - name: cache + persistentVolumeClaim: + claimName: gohoarder-cache + - name: metadata + persistentVolumeClaim: + claimName: gohoarder-metadata + - name: config + configMap: + name: gohoarder-config + +--- +# Deployment - Gateway (Nginx Reverse Proxy) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gohoarder-gateway + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: gateway +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: gateway + template: + metadata: + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: gateway + spec: + containers: + - name: gateway + image: ghcr.io/lukaszraczylo/gohoarder-gateway:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 80 + protocol: TCP + env: + - name: BACKEND_HOST + value: gohoarder-server + - name: BACKEND_PORT + value: "8080" + - name: FRONTEND_HOST + value: gohoarder-frontend + - name: FRONTEND_PORT + value: "80" + - name: SERVER_NAME + value: hoarder.i.raczylo.com + livenessProbe: + httpGet: + path: /health + port: 80 + initialDelaySeconds: 5 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: 80 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 256Mi + +--- +# Service - Gateway +apiVersion: v1 +kind: Service +metadata: + name: gohoarder-gateway + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: gateway +spec: + type: ClusterIP + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP + selector: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: gateway + +--- +# Ingress - Expose via domain +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: gohoarder + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: ingress + annotations: + # Nginx ingress annotations + nginx.ingress.kubernetes.io/proxy-body-size: "500m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "600" + # Enable CORS if needed + # nginx.ingress.kubernetes.io/enable-cors: "true" + # TLS/SSL configuration (uncomment if using cert-manager) + # cert-manager.io/cluster-issuer: "letsencrypt-prod" +spec: + ingressClassName: nginx # Adjust based on your ingress controller + rules: + - host: hoarder.i.raczylo.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: gohoarder-gateway + port: + number: 80 + # Uncomment for HTTPS/TLS + # tls: + # - hosts: + # - hoarder.i.raczylo.com + # secretName: gohoarder-tls + +--- +# HorizontalPodAutoscaler - Server +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: gohoarder-server + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: server +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: gohoarder-server + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + +--- +# HorizontalPodAutoscaler - Gateway +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: gohoarder-gateway + namespace: gohoarder + labels: + app.kubernetes.io/name: gohoarder + app.kubernetes.io/component: gateway +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: gohoarder-gateway + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 diff --git a/deployments/kubernetes/deployment.yaml b/deployments/kubernetes/deployment.yaml new file mode 100644 index 0000000..4ee481e --- /dev/null +++ b/deployments/kubernetes/deployment.yaml @@ -0,0 +1,104 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gohoarder + namespace: default + labels: + app: gohoarder +spec: + replicas: 2 + selector: + matchLabels: + app: gohoarder + template: + metadata: + labels: + app: gohoarder + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + + containers: + - name: gohoarder + image: gohoarder:latest + imagePullPolicy: IfNotPresent + + ports: + - name: http + containerPort: 8080 + protocol: TCP + + env: + - name: CONFIG_FILE + value: /etc/gohoarder/config.yaml + + volumeMounts: + # Configuration file + - name: config + mountPath: /etc/gohoarder/config.yaml + subPath: config.yaml + readOnly: true + + # Git credentials (pattern-based) + - name: git-credentials + mountPath: /etc/gohoarder/git-credentials.json + subPath: credentials.json + readOnly: true + + # Persistent storage for cache + - name: cache + mountPath: /var/lib/gohoarder/cache + + # Persistent storage for metadata database + - name: metadata + mountPath: /var/lib/gohoarder + + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "2Gi" + cpu: "1000m" + + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /health/ready + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 3 + + volumes: + # ConfigMap with application configuration + - name: config + configMap: + name: gohoarder-config + + # Secret with git credentials + - name: git-credentials + secret: + secretName: gohoarder-git-credentials + defaultMode: 0400 # Read-only for owner + + # PersistentVolumeClaim for cache + - name: cache + persistentVolumeClaim: + claimName: gohoarder-cache-pvc + + # PersistentVolumeClaim for metadata + - name: metadata + persistentVolumeClaim: + claimName: gohoarder-metadata-pvc diff --git a/deployments/kubernetes/pvc.yaml b/deployments/kubernetes/pvc.yaml new file mode 100644 index 0000000..58c8fe3 --- /dev/null +++ b/deployments/kubernetes/pvc.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: gohoarder-cache-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + # Uncomment and set your storage class if needed + # storageClassName: fast-ssd + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: gohoarder-metadata-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + # Uncomment and set your storage class if needed + # storageClassName: standard diff --git a/deployments/kubernetes/secret-git-credentials.yaml b/deployments/kubernetes/secret-git-credentials.yaml new file mode 100644 index 0000000..b3747cc --- /dev/null +++ b/deployments/kubernetes/secret-git-credentials.yaml @@ -0,0 +1,61 @@ +apiVersion: v1 +kind: Secret +metadata: + name: gohoarder-git-credentials + namespace: default +type: Opaque +stringData: + credentials.json: | + { + "credentials": [ + { + "pattern": "github.com/mycompany/*", + "host": "github.com", + "username": "oauth2", + "token": "ghp_REPLACE_WITH_YOUR_GITHUB_TOKEN", + "fallback": false + }, + { + "pattern": "github.com/external-vendor/*", + "host": "github.com", + "username": "oauth2", + "token": "ghp_REPLACE_WITH_VENDOR_TOKEN", + "fallback": false + }, + { + "pattern": "gitlab.com/backend-team/*", + "host": "gitlab.com", + "username": "oauth2", + "token": "glpat_REPLACE_WITH_GITLAB_TOKEN", + "fallback": false + }, + { + "pattern": "*", + "host": "*", + "username": "oauth2", + "token": "ghp_REPLACE_WITH_DEFAULT_READONLY_TOKEN", + "fallback": true + } + ] + } +--- +# Example using External Secrets Operator (ESO) +# Uncomment and configure if you're using ESO +# apiVersion: external-secrets.io/v1beta1 +# kind: ExternalSecret +# metadata: +# name: gohoarder-git-credentials +# namespace: default +# spec: +# refreshInterval: 1h +# secretStoreRef: +# name: vault-backend # Your SecretStore name +# kind: SecretStore +# target: +# name: gohoarder-git-credentials +# creationPolicy: Owner +# data: +# - secretKey: credentials.json +# remoteRef: +# key: secret/gohoarder/git-credentials +# property: credentials.json diff --git a/deployments/kubernetes/service.yaml b/deployments/kubernetes/service.yaml new file mode 100644 index 0000000..91f0d14 --- /dev/null +++ b/deployments/kubernetes/service.yaml @@ -0,0 +1,44 @@ +apiVersion: v1 +kind: Service +metadata: + name: gohoarder + namespace: default + labels: + app: gohoarder +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + app: gohoarder +--- +# Optional: Ingress for external access +# Uncomment and configure based on your ingress controller +# apiVersion: networking.k8s.io/v1 +# kind: Ingress +# metadata: +# name: gohoarder +# namespace: default +# annotations: +# nginx.ingress.kubernetes.io/proxy-body-size: "500m" +# nginx.ingress.kubernetes.io/proxy-read-timeout: "600" +# spec: +# ingressClassName: nginx +# rules: +# - host: gohoarder.example.com +# http: +# paths: +# - path: / +# pathType: Prefix +# backend: +# service: +# name: gohoarder +# port: +# name: http +# tls: +# - hosts: +# - gohoarder.example.com +# secretName: gohoarder-tls diff --git a/docker-compose.example.yaml b/docker-compose.example.yaml new file mode 100644 index 0000000..29446d0 --- /dev/null +++ b/docker-compose.example.yaml @@ -0,0 +1,151 @@ +version: '3.8' + +# GoHoarder - Unified Deployment Example +# This docker-compose file demonstrates deploying all GoHoarder services +# under a single domain using the gateway reverse proxy + +services: + # Backend - Main application server + gohoarder-server: + image: ghcr.io/lukaszraczylo/gohoarder-server:latest + container_name: gohoarder-server + restart: unless-stopped + environment: + # Application configuration + - CONFIG_FILE=/config/config.yaml + # Database + - DB_PATH=/data/metadata/gohoarder.db + # Storage + - STORAGE_BACKEND=filesystem + - STORAGE_PATH=/data/cache + # Security scanning + - ENABLE_SCANNING=true + - SCAN_ON_DOWNLOAD=true + # Logging + - LOG_LEVEL=info + - LOG_FORMAT=json + volumes: + # Configuration + - ./config.yaml:/config/config.yaml:ro + # Data persistence + - gohoarder-cache:/data/cache + - gohoarder-metadata:/data/metadata + networks: + - gohoarder-internal + healthcheck: + test: ["CMD", "/usr/local/bin/gohoarder", "version"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + + # Frontend - Web dashboard + gohoarder-frontend: + image: ghcr.io/lukaszraczylo/gohoarder-frontend:latest + container_name: gohoarder-frontend + restart: unless-stopped + environment: + # Runtime configuration - injected into /config.js + - API_BASE_URL=/api + - APP_VERSION=1.0.0 + - APP_NAME=GoHoarder + networks: + - gohoarder-internal + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + + # Scanner - Background vulnerability scanner (optional) + gohoarder-scanner: + image: ghcr.io/lukaszraczylo/gohoarder-scanner:latest + container_name: gohoarder-scanner + restart: unless-stopped + environment: + - CONFIG_FILE=/config/config.yaml + - SCANNER_MODE=true + - SCANNER_WORKERS=4 + - SCANNER_INTERVAL=300 + - LOG_LEVEL=info + volumes: + - ./config.yaml:/config/config.yaml:ro + - gohoarder-cache:/data/cache:ro + - gohoarder-metadata:/data/metadata + networks: + - gohoarder-internal + depends_on: + - gohoarder-server + # Uncomment if you want to run scanner separately + # If commented out, scanning happens inline in the server + # profiles: + # - scanner + + # Gateway - Nginx reverse proxy + gohoarder-gateway: + image: ghcr.io/lukaszraczylo/gohoarder-gateway:latest + container_name: gohoarder-gateway + restart: unless-stopped + environment: + # Backend service connection + - BACKEND_HOST=gohoarder-server + - BACKEND_PORT=8080 + # Frontend service connection + - FRONTEND_HOST=gohoarder-frontend + - FRONTEND_PORT=80 + # Server configuration + - SERVER_NAME=hoarder.i.raczylo.com + ports: + # Map to host port 80 (HTTP) + - "80:80" + # Map to host port 443 (HTTPS) - uncomment if using SSL + # - "443:443" + networks: + - gohoarder-internal + depends_on: + - gohoarder-server + - gohoarder-frontend + # Uncomment if using custom SSL certificates + # volumes: + # - ./ssl/cert.pem:/etc/nginx/ssl/cert.pem:ro + # - ./ssl/key.pem:/etc/nginx/ssl/key.pem:ro + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + +networks: + gohoarder-internal: + driver: bridge + +volumes: + # Persistent storage for cached packages + gohoarder-cache: + driver: local + # Persistent storage for metadata and scan results + gohoarder-metadata: + driver: local + +# Usage: +# 1. Copy this file: cp docker-compose.example.yaml docker-compose.yaml +# 2. Copy config: cp config.yaml.example config.yaml +# 3. Edit config.yaml with your settings +# 4. Start services: docker-compose up -d +# 5. View logs: docker-compose logs -f +# 6. Stop services: docker-compose down +# +# Access: +# - Web UI: http://localhost or http://hoarder.i.raczylo.com +# - API: http://localhost/api or http://hoarder.i.raczylo.com/api +# - Health: http://localhost/health +# - Metrics: http://localhost/metrics +# +# For production: +# - Enable HTTPS in the gateway container +# - Set up proper SSL certificates +# - Configure firewall rules +# - Set appropriate resource limits +# - Enable monitoring and alerting diff --git a/pkg/app/app.go b/pkg/app/app.go index 8698ece..b258c75 100644 --- a/pkg/app/app.go +++ b/pkg/app/app.go @@ -30,6 +30,7 @@ import ( "github.com/lukaszraczylo/gohoarder/pkg/scanner" "github.com/lukaszraczylo/gohoarder/pkg/storage" "github.com/lukaszraczylo/gohoarder/pkg/storage/filesystem" + "github.com/lukaszraczylo/gohoarder/pkg/vcs" "github.com/lukaszraczylo/gohoarder/pkg/websocket" "github.com/rs/zerolog/log" ) @@ -251,9 +252,28 @@ func (a *App) setupServer() error { a.app.All("/api/admin/bypasses/:id?", a.handleAdminBypasses) // Proxy handlers (adapted from net/http) + // Load git credentials if configured + var credStore *vcs.CredentialStore + if a.config.Handlers.Go.GitCredentialsFile != "" { + credStore = vcs.NewCredentialStore() + if err := credStore.LoadFromFile(a.config.Handlers.Go.GitCredentialsFile); err != nil { + log.Error(). + Err(err). + Str("file", a.config.Handlers.Go.GitCredentialsFile). + Msg("Failed to load git credentials, continuing without pattern-based credentials") + } else if err := credStore.ValidateConfig(); err != nil { + log.Error(). + Err(err). + Str("file", a.config.Handlers.Go.GitCredentialsFile). + Msg("Invalid git credentials configuration, continuing without pattern-based credentials") + credStore = nil + } + } + goProxyHandler := goproxy.New(a.cache, a.networkClient, goproxy.Config{ - Upstream: "https://proxy.golang.org", - SumDBURL: "https://sum.golang.org", + Upstream: "https://proxy.golang.org", + SumDBURL: "https://sum.golang.org", + CredStore: credStore, }) a.app.All("/go/*", adaptor.HTTPHandler(http.StripPrefix("/go", goProxyHandler))) diff --git a/pkg/auth/extractor.go b/pkg/auth/extractor.go new file mode 100644 index 0000000..de72269 --- /dev/null +++ b/pkg/auth/extractor.go @@ -0,0 +1,68 @@ +package auth + +import ( + "encoding/base64" + "net/http" + "strings" +) + +// CredentialExtractor extracts authentication credentials from HTTP requests +type CredentialExtractor struct{} + +// NewCredentialExtractor creates a new credential extractor +func NewCredentialExtractor() *CredentialExtractor { + return &CredentialExtractor{} +} + +// Extract extracts authentication credentials from an HTTP request +// Returns the full Authorization header value or constructed auth string +func (e *CredentialExtractor) Extract(r *http.Request) string { + // Try Authorization header first (most common) + if auth := r.Header.Get("Authorization"); auth != "" { + return auth + } + + // Try Basic auth from URL (for PyPI compatibility) + if username, password, ok := r.BasicAuth(); ok { + auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + password)) + return "Basic " + auth + } + + // No credentials found + return "" +} + +// ExtractScheme returns the authentication scheme (Bearer, Basic, Token) +func (e *CredentialExtractor) ExtractScheme(r *http.Request) string { + auth := e.Extract(r) + if auth == "" { + return "" + } + + parts := strings.SplitN(auth, " ", 2) + if len(parts) == 2 { + return parts[0] + } + + return "" +} + +// ExtractToken extracts just the token part (without scheme) +func (e *CredentialExtractor) ExtractToken(r *http.Request) string { + auth := e.Extract(r) + if auth == "" { + return "" + } + + // Remove scheme prefix + auth = strings.TrimPrefix(auth, "Bearer ") + auth = strings.TrimPrefix(auth, "Token ") + auth = strings.TrimPrefix(auth, "Basic ") + + return auth +} + +// HasCredentials checks if request has any credentials +func (e *CredentialExtractor) HasCredentials(r *http.Request) bool { + return e.Extract(r) != "" +} diff --git a/pkg/auth/hasher.go b/pkg/auth/hasher.go new file mode 100644 index 0000000..c9bcb17 --- /dev/null +++ b/pkg/auth/hasher.go @@ -0,0 +1,38 @@ +package auth + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" +) + +// CredentialHasher generates hashes of credentials for cache keys +type CredentialHasher struct{} + +// NewCredentialHasher creates a new credential hasher +func NewCredentialHasher() *CredentialHasher { + return &CredentialHasher{} +} + +// Hash generates a short hash of credentials for use in cache keys +// Returns "public" if no credentials provided +func (h *CredentialHasher) Hash(credentials string) string { + if credentials == "" { + return "public" + } + + // Use SHA256 and take first 16 characters (8 bytes) + hash := sha256.Sum256([]byte(credentials)) + return hex.EncodeToString(hash[:8]) +} + +// GenerateCacheKey generates a cache key that includes credential hash +func (h *CredentialHasher) GenerateCacheKey(registry, packageName, version, credentials string) string { + credHash := h.Hash(credentials) + return fmt.Sprintf("%s:%s:%s:%s", registry, packageName, version, credHash) +} + +// IsPublicKey checks if a cache key is for public packages (no credentials) +func (h *CredentialHasher) IsPublicKey(cacheKey string) bool { + return len(cacheKey) > 0 && cacheKey[len(cacheKey)-6:] == "public" +} diff --git a/pkg/auth/validation_cache.go b/pkg/auth/validation_cache.go new file mode 100644 index 0000000..4b9e316 --- /dev/null +++ b/pkg/auth/validation_cache.go @@ -0,0 +1,109 @@ +package auth + +import ( + "sync" + "time" +) + +// ValidationResult represents a cached credential validation result +type ValidationResult struct { + Allowed bool + ExpiresAt time.Time + Reason string +} + +// ValidationCache caches credential validation results to reduce upstream checks +type ValidationCache struct { + cache map[string]*ValidationResult + mu sync.RWMutex + ttl time.Duration +} + +// NewValidationCache creates a new validation cache +func NewValidationCache(ttl time.Duration) *ValidationCache { + vc := &ValidationCache{ + cache: make(map[string]*ValidationResult), + ttl: ttl, + } + + // Start cleanup goroutine + go vc.cleanupExpired() + + return vc +} + +// Get retrieves a validation result from cache +// Returns (allowed bool, cached bool, reason string) +func (vc *ValidationCache) Get(credHash, packageURL string) (bool, bool, string) { + vc.mu.RLock() + defer vc.mu.RUnlock() + + key := credHash + ":" + packageURL + result, exists := vc.cache[key] + + if !exists { + return false, false, "" + } + + // Check if expired + if time.Now().After(result.ExpiresAt) { + return false, false, "" + } + + return result.Allowed, true, result.Reason +} + +// Set stores a validation result in cache +func (vc *ValidationCache) Set(credHash, packageURL string, allowed bool, reason string) { + vc.mu.Lock() + defer vc.mu.Unlock() + + key := credHash + ":" + packageURL + vc.cache[key] = &ValidationResult{ + Allowed: allowed, + ExpiresAt: time.Now().Add(vc.ttl), + Reason: reason, + } +} + +// Invalidate removes a specific entry from cache +func (vc *ValidationCache) Invalidate(credHash, packageURL string) { + vc.mu.Lock() + defer vc.mu.Unlock() + + key := credHash + ":" + packageURL + delete(vc.cache, key) +} + +// InvalidateAll clears the entire cache +func (vc *ValidationCache) InvalidateAll() { + vc.mu.Lock() + defer vc.mu.Unlock() + + vc.cache = make(map[string]*ValidationResult) +} + +// Size returns the number of cached entries +func (vc *ValidationCache) Size() int { + vc.mu.RLock() + defer vc.mu.RUnlock() + + return len(vc.cache) +} + +// cleanupExpired removes expired entries periodically +func (vc *ValidationCache) cleanupExpired() { + ticker := time.NewTicker(1 * time.Minute) + defer ticker.Stop() + + for range ticker.C { + vc.mu.Lock() + now := time.Now() + for key, result := range vc.cache { + if now.After(result.ExpiresAt) { + delete(vc.cache, key) + } + } + vc.mu.Unlock() + } +} diff --git a/pkg/auth/validator.go b/pkg/auth/validator.go new file mode 100644 index 0000000..4e244bc --- /dev/null +++ b/pkg/auth/validator.go @@ -0,0 +1,284 @@ +package auth + +import ( + "context" + "fmt" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/rs/zerolog/log" +) + +// CredentialValidator validates credentials with upstream registries +type CredentialValidator interface { + // ValidateAccess checks if credentials grant access to a package + // Returns (allowed bool, error) + ValidateAccess(ctx context.Context, packageURL string, credentials string) (bool, error) +} + +// NPMValidator validates npm registry credentials +type NPMValidator struct { + client *http.Client + timeout time.Duration +} + +// NewNPMValidator creates a new npm credential validator +func NewNPMValidator() *NPMValidator { + return &NPMValidator{ + client: &http.Client{ + Timeout: 5 * time.Second, + }, + timeout: 5 * time.Second, + } +} + +// ValidateAccess validates npm package access using HEAD request +func (v *NPMValidator) ValidateAccess(ctx context.Context, packageURL string, credentials string) (bool, error) { + req, err := http.NewRequestWithContext(ctx, "HEAD", packageURL, nil) + if err != nil { + return false, err + } + + // Add credentials if provided + if credentials != "" { + req.Header.Set("Authorization", credentials) + } + + resp, err := v.client.Do(req) + if err != nil { + // Network error - allow cache fallback with warning + log.Warn().Err(err).Str("url", packageURL).Msg("Validation request failed, allowing cache fallback") + return true, fmt.Errorf("validation failed: %w (allowing cache fallback)", err) + } + defer resp.Body.Close() + + // Check status code + switch resp.StatusCode { + case 200, 304: + // Access granted + return true, nil + case 401, 403, 404: + // Access denied + return false, fmt.Errorf("access denied: HTTP %d", resp.StatusCode) + default: + // Unexpected status - allow cache fallback with warning + log.Warn().Int("status", resp.StatusCode).Str("url", packageURL).Msg("Unexpected validation status, allowing cache fallback") + return true, fmt.Errorf("unexpected status %d (allowing cache fallback)", resp.StatusCode) + } +} + +// PyPIValidator validates PyPI registry credentials +type PyPIValidator struct { + client *http.Client + timeout time.Duration +} + +// NewPyPIValidator creates a new PyPI credential validator +func NewPyPIValidator() *PyPIValidator { + return &PyPIValidator{ + client: &http.Client{ + Timeout: 5 * time.Second, + }, + timeout: 5 * time.Second, + } +} + +// ValidateAccess validates PyPI package access using HEAD request +func (v *PyPIValidator) ValidateAccess(ctx context.Context, packageURL string, credentials string) (bool, error) { + req, err := http.NewRequestWithContext(ctx, "HEAD", packageURL, nil) + if err != nil { + return false, err + } + + // Add credentials if provided + if credentials != "" { + req.Header.Set("Authorization", credentials) + } + + resp, err := v.client.Do(req) + if err != nil { + // Network error - allow cache fallback with warning + log.Warn().Err(err).Str("url", packageURL).Msg("Validation request failed, allowing cache fallback") + return true, fmt.Errorf("validation failed: %w (allowing cache fallback)", err) + } + defer resp.Body.Close() + + // Check status code + switch resp.StatusCode { + case 200, 304: + // Access granted + return true, nil + case 401, 403, 404: + // Access denied + return false, fmt.Errorf("access denied: HTTP %d", resp.StatusCode) + default: + // Unexpected status - allow cache fallback with warning + log.Warn().Int("status", resp.StatusCode).Str("url", packageURL).Msg("Unexpected validation status, allowing cache fallback") + return true, fmt.Errorf("unexpected status %d (allowing cache fallback)", resp.StatusCode) + } +} + +// GoValidator validates Go module credentials +type GoValidator struct { + timeout time.Duration +} + +// NewGoValidator creates a new Go module credential validator +func NewGoValidator() *GoValidator { + return &GoValidator{ + timeout: 10 * time.Second, + } +} + +// ValidateAccess validates Go module access using git ls-remote +func (v *GoValidator) ValidateAccess(ctx context.Context, modulePath string, credentials string) (bool, error) { + // Create context with timeout + ctx, cancel := context.WithTimeout(ctx, v.timeout) + defer cancel() + + // Determine repository type and validate accordingly + if strings.HasPrefix(modulePath, "github.com/") { + return v.validateGitHub(ctx, modulePath, credentials) + } + + if strings.HasPrefix(modulePath, "gitlab.com/") { + return v.validateGitLab(ctx, modulePath, credentials) + } + + // For other Git providers, use generic git validation + return v.validateGit(ctx, modulePath, credentials) +} + +func (v *GoValidator) validateGitHub(ctx context.Context, modulePath, credentials string) (bool, error) { + // Extract token from credentials + token := strings.TrimPrefix(credentials, "Bearer ") + token = strings.TrimPrefix(token, "Token ") + + if token == "" || token == credentials { + // No token provided or not in expected format + return false, fmt.Errorf("no GitHub token provided") + } + + // Build git URL + repoURL := fmt.Sprintf("https://%s.git", modulePath) + + // Create temporary directory for .netrc + tempDir, err := os.MkdirTemp("", "gohoarder-validate-*") + if err != nil { + return false, err + } + defer os.RemoveAll(tempDir) + + // Create .netrc file with credentials + netrcPath := filepath.Join(tempDir, ".netrc") + netrcContent := fmt.Sprintf("machine github.com\nlogin oauth2\npassword %s\n", token) + if err := os.WriteFile(netrcPath, []byte(netrcContent), 0600); err != nil { + return false, err + } + + // Run git ls-remote (lightweight, just checks access) + cmd := exec.CommandContext(ctx, "git", "ls-remote", repoURL, "HEAD") + cmd.Env = append(os.Environ(), + "HOME="+tempDir, // Use temp .netrc + "GIT_TERMINAL_PROMPT=0", // Disable prompts + ) + + output, err := cmd.CombinedOutput() + if err != nil { + // Check error message + errMsg := string(output) + if strings.Contains(errMsg, "could not read Username") || + strings.Contains(errMsg, "Authentication failed") || + strings.Contains(errMsg, "fatal: repository") || + strings.Contains(errMsg, "not found") { + // Access denied + return false, fmt.Errorf("access denied: %s", strings.TrimSpace(errMsg)) + } + + // Other error (network, etc.) - allow cache fallback + log.Warn().Err(err).Str("module", modulePath).Msg("Git validation failed, allowing cache fallback") + return true, fmt.Errorf("validation error (allowing cache): %w", err) + } + + // Success - repository accessible + return true, nil +} + +func (v *GoValidator) validateGitLab(ctx context.Context, modulePath, credentials string) (bool, error) { + // Extract token from credentials + token := strings.TrimPrefix(credentials, "Bearer ") + token = strings.TrimPrefix(token, "Token ") + token = strings.TrimPrefix(token, "Private-Token ") + + if token == "" || token == credentials { + // No token provided + return false, fmt.Errorf("no GitLab token provided") + } + + // Build git URL + repoURL := fmt.Sprintf("https://%s.git", modulePath) + + // Create temporary directory for .netrc + tempDir, err := os.MkdirTemp("", "gohoarder-validate-*") + if err != nil { + return false, err + } + defer os.RemoveAll(tempDir) + + // Create .netrc file with credentials + netrcPath := filepath.Join(tempDir, ".netrc") + netrcContent := fmt.Sprintf("machine gitlab.com\nlogin oauth2\npassword %s\n", token) + if err := os.WriteFile(netrcPath, []byte(netrcContent), 0600); err != nil { + return false, err + } + + // Run git ls-remote + cmd := exec.CommandContext(ctx, "git", "ls-remote", repoURL, "HEAD") + cmd.Env = append(os.Environ(), + "HOME="+tempDir, + "GIT_TERMINAL_PROMPT=0", + ) + + output, err := cmd.CombinedOutput() + if err != nil { + errMsg := string(output) + if strings.Contains(errMsg, "could not read Username") || + strings.Contains(errMsg, "Authentication failed") || + strings.Contains(errMsg, "not found") { + return false, fmt.Errorf("access denied: %s", strings.TrimSpace(errMsg)) + } + + log.Warn().Err(err).Str("module", modulePath).Msg("Git validation failed, allowing cache fallback") + return true, fmt.Errorf("validation error (allowing cache): %w", err) + } + + return true, nil +} + +func (v *GoValidator) validateGit(ctx context.Context, modulePath, credentials string) (bool, error) { + // Generic git validation for other providers + // Similar to GitHub validation but with generic host detection + repoURL := fmt.Sprintf("https://%s.git", modulePath) + + cmd := exec.CommandContext(ctx, "git", "ls-remote", repoURL, "HEAD") + cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0") + + output, err := cmd.CombinedOutput() + if err != nil { + errMsg := string(output) + if strings.Contains(errMsg, "could not read Username") || + strings.Contains(errMsg, "Authentication failed") || + strings.Contains(errMsg, "not found") { + return false, fmt.Errorf("access denied: %s", strings.TrimSpace(errMsg)) + } + + log.Warn().Err(err).Str("module", modulePath).Msg("Git validation failed, allowing cache fallback") + return true, fmt.Errorf("validation error (allowing cache): %w", err) + } + + return true, nil +} diff --git a/pkg/config/config.go b/pkg/config/config.go index e0e4071..878bb41 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -245,10 +245,11 @@ type HandlersConfig struct { // GoHandlerConfig contains Go proxy configuration type GoHandlerConfig struct { - Enabled bool `mapstructure:"enabled" json:"enabled"` - UpstreamProxy string `mapstructure:"upstream_proxy" json:"upstream_proxy"` - ChecksumDB string `mapstructure:"checksum_db" json:"checksum_db"` - VerifyChecksums bool `mapstructure:"verify_checksums" json:"verify_checksums"` + Enabled bool `mapstructure:"enabled" json:"enabled"` + UpstreamProxy string `mapstructure:"upstream_proxy" json:"upstream_proxy"` + ChecksumDB string `mapstructure:"checksum_db" json:"checksum_db"` + VerifyChecksums bool `mapstructure:"verify_checksums" json:"verify_checksums"` + GitCredentialsFile string `mapstructure:"git_credentials_file" json:"git_credentials_file"` // Path to git credentials JSON file } // NPMHandlerConfig contains NPM registry configuration diff --git a/pkg/metadata/interface.go b/pkg/metadata/interface.go index c0df62c..b038c45 100644 --- a/pkg/metadata/interface.go +++ b/pkg/metadata/interface.go @@ -83,6 +83,8 @@ type Package struct { DownloadCount int64 `json:"download_count"` // Download counter Metadata map[string]string `json:"metadata"` // Additional metadata SecurityScanned bool `json:"security_scanned"` // Has been scanned + RequiresAuth bool `json:"requires_auth"` // Package requires authentication + AuthProvider string `json:"auth_provider"` // Auth provider (github.com, npm.pkg.github.com, etc.) } // ScanResult represents a security scan result diff --git a/pkg/metadata/sqlite/sqlite.go b/pkg/metadata/sqlite/sqlite.go index 390880c..159f6e9 100644 --- a/pkg/metadata/sqlite/sqlite.go +++ b/pkg/metadata/sqlite/sqlite.go @@ -46,6 +46,8 @@ CREATE TABLE IF NOT EXISTS packages ( download_count INTEGER DEFAULT 0, metadata TEXT, security_scanned BOOLEAN DEFAULT 0, + requires_auth BOOLEAN DEFAULT 0, + auth_provider TEXT, UNIQUE(registry, name, version) ); @@ -149,11 +151,51 @@ func New(cfg Config) (*SQLiteStore, error) { return nil, errors.Wrap(err, errors.ErrCodeStorageFailure, "failed to create SQLite schema") } + // Run migrations for existing databases + if err := runMigrations(db); err != nil { + db.Close() + return nil, errors.Wrap(err, errors.ErrCodeStorageFailure, "failed to run database migrations") + } + return &SQLiteStore{ db: db, }, nil } +// runMigrations runs database migrations for existing databases +func runMigrations(db *sql.DB) error { + // Migration 1: Add requires_auth and auth_provider columns (if they don't exist) + // SQLite doesn't have IF NOT EXISTS for ALTER TABLE, so we need to check first + var columnExists int + err := db.QueryRow("SELECT COUNT(*) FROM pragma_table_info('packages') WHERE name='requires_auth'").Scan(&columnExists) + if err != nil { + return err + } + + if columnExists == 0 { + log.Info().Msg("Running migration: adding requires_auth and auth_provider columns") + + // Add requires_auth column + if _, err := db.Exec("ALTER TABLE packages ADD COLUMN requires_auth BOOLEAN DEFAULT 0"); err != nil { + return fmt.Errorf("failed to add requires_auth column: %w", err) + } + + // Add auth_provider column + if _, err := db.Exec("ALTER TABLE packages ADD COLUMN auth_provider TEXT"); err != nil { + return fmt.Errorf("failed to add auth_provider column: %w", err) + } + + // Create index + if _, err := db.Exec("CREATE INDEX IF NOT EXISTS idx_packages_requires_auth ON packages(requires_auth)"); err != nil { + return fmt.Errorf("failed to create requires_auth index: %w", err) + } + + log.Info().Msg("Migration completed successfully") + } + + return nil +} + // SavePackage saves package metadata func (s *SQLiteStore) SavePackage(ctx context.Context, pkg *metadata.Package) error { s.mu.Lock() @@ -175,8 +217,8 @@ func (s *SQLiteStore) SavePackage(ctx context.Context, pkg *metadata.Package) er id, registry, name, version, storage_key, size, checksum_md5, checksum_sha256, upstream_url, cached_at, last_accessed, expires_at, download_count, - metadata, security_scanned - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + metadata, security_scanned, requires_auth, auth_provider + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(registry, name, version) DO UPDATE SET storage_key = excluded.storage_key, size = excluded.size, @@ -186,14 +228,16 @@ func (s *SQLiteStore) SavePackage(ctx context.Context, pkg *metadata.Package) er last_accessed = excluded.last_accessed, expires_at = excluded.expires_at, metadata = excluded.metadata, - security_scanned = excluded.security_scanned + security_scanned = excluded.security_scanned, + requires_auth = excluded.requires_auth, + auth_provider = excluded.auth_provider ` _, err = s.db.ExecContext(ctx, query, pkg.ID, pkg.Registry, pkg.Name, pkg.Version, pkg.StorageKey, pkg.Size, pkg.ChecksumMD5, pkg.ChecksumSHA256, pkg.UpstreamURL, pkg.CachedAt, pkg.LastAccessed, expiresAt, pkg.DownloadCount, - string(metadataJSON), pkg.SecurityScanned, + string(metadataJSON), pkg.SecurityScanned, pkg.RequiresAuth, pkg.AuthProvider, ) if err != nil { @@ -212,7 +256,7 @@ func (s *SQLiteStore) GetPackage(ctx context.Context, registry, name, version st SELECT id, registry, name, version, storage_key, size, checksum_md5, checksum_sha256, upstream_url, cached_at, last_accessed, expires_at, download_count, - metadata, security_scanned + metadata, security_scanned, requires_auth, auth_provider FROM packages WHERE registry = ? AND name = ? AND version = ? ` @@ -220,12 +264,13 @@ func (s *SQLiteStore) GetPackage(ctx context.Context, registry, name, version st var pkg metadata.Package var metadataJSON string var expiresAt sql.NullTime + var authProvider sql.NullString err := s.db.QueryRowContext(ctx, query, registry, name, version).Scan( &pkg.ID, &pkg.Registry, &pkg.Name, &pkg.Version, &pkg.StorageKey, &pkg.Size, &pkg.ChecksumMD5, &pkg.ChecksumSHA256, &pkg.UpstreamURL, &pkg.CachedAt, &pkg.LastAccessed, &expiresAt, &pkg.DownloadCount, - &metadataJSON, &pkg.SecurityScanned, + &metadataJSON, &pkg.SecurityScanned, &pkg.RequiresAuth, &authProvider, ) if err == sql.ErrNoRows { @@ -240,6 +285,10 @@ func (s *SQLiteStore) GetPackage(ctx context.Context, registry, name, version st pkg.ExpiresAt = &expiresAt.Time } + if authProvider.Valid { + pkg.AuthProvider = authProvider.String + } + // Deserialize metadata if metadataJSON != "" { if err := goccy_json.Unmarshal([]byte(metadataJSON), &pkg.Metadata); err != nil { @@ -516,6 +565,7 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg COUNT(*) as download_count FROM download_events WHERE downloaded_at >= ? + AND downloaded_at IS NOT NULL ` args = []interface{}{timeFormat, startTime} @@ -526,6 +576,7 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg query += ` GROUP BY time_bucket + HAVING time_bucket IS NOT NULL ORDER BY time_bucket ASC ` } else { @@ -535,7 +586,9 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg time_bucket, SUM(download_count) as download_count FROM aggregated_download_stats - WHERE resolution = ? AND time_bucket >= ? + WHERE resolution = ? + AND time_bucket >= ? + AND time_bucket IS NOT NULL ` args = []interface{}{useResolution, startTime} @@ -559,12 +612,15 @@ func (s *SQLiteStore) GetTimeSeriesStats(ctx context.Context, period string, reg // Collect data points dataMap := make(map[string]int64) for rows.Next() { - var bucket string + var bucket sql.NullString var count int64 if err := rows.Scan(&bucket, &count); err != nil { return nil, errors.Wrap(err, errors.ErrCodeStorageFailure, "failed to scan time-series data") } - dataMap[bucket] = count + // Skip NULL buckets (shouldn't happen with NOT NULL constraint, but defensive) + if bucket.Valid && bucket.String != "" { + dataMap[bucket.String] = count + } } if err := rows.Err(); err != nil { @@ -654,7 +710,9 @@ func (s *SQLiteStore) AggregateDownloadData(ctx context.Context) error { COUNT(*) as download_count FROM download_events WHERE downloaded_at < ? + AND downloaded_at IS NOT NULL GROUP BY registry, time_bucket + HAVING time_bucket IS NOT NULL ` _, err = tx.ExecContext(ctx, hourlyAggQuery, oneHourAgo) if err != nil { @@ -680,8 +738,11 @@ func (s *SQLiteStore) AggregateDownloadData(ctx context.Context) error { 'daily' as resolution, SUM(download_count) as download_count FROM aggregated_download_stats - WHERE resolution = 'hourly' AND time_bucket < ? + WHERE resolution = 'hourly' + AND time_bucket < ? + AND time_bucket IS NOT NULL GROUP BY registry, strftime('%Y-%m-%d 00:00:00', time_bucket) + HAVING time_bucket IS NOT NULL ` _, err = tx.ExecContext(ctx, dailyAggQuery, oneDayAgo) if err != nil { diff --git a/pkg/proxy/goproxy/goproxy.go b/pkg/proxy/goproxy/goproxy.go index 16cb4c1..9bcb384 100644 --- a/pkg/proxy/goproxy/goproxy.go +++ b/pkg/proxy/goproxy/goproxy.go @@ -6,25 +6,35 @@ import ( "io" "net/http" "strings" + "time" + "github.com/lukaszraczylo/gohoarder/pkg/auth" "github.com/lukaszraczylo/gohoarder/pkg/cache" "github.com/lukaszraczylo/gohoarder/pkg/errors" "github.com/lukaszraczylo/gohoarder/pkg/network" + "github.com/lukaszraczylo/gohoarder/pkg/vcs" "github.com/rs/zerolog/log" ) // Handler implements the GOPROXY protocol type Handler struct { - cache *cache.Manager - client *network.Client - upstream string - sumDBURL string + cache *cache.Manager + client *network.Client + upstream string + sumDBURL string + credExtractor *auth.CredentialExtractor + credHasher *auth.CredentialHasher + credValidator *auth.GoValidator + validationCache *auth.ValidationCache + gitFetcher *vcs.GitFetcher + moduleBuilder *vcs.ModuleBuilder } // Config holds Go proxy configuration type Config struct { - Upstream string // Upstream Go proxy (e.g., proxy.golang.org) - SumDBURL string // Checksum database URL + Upstream string // Upstream Go proxy (e.g., proxy.golang.org) + SumDBURL string // Checksum database URL + CredStore *vcs.CredentialStore // Optional credential store for git access } // New creates a new Go proxy handler @@ -37,11 +47,23 @@ func New(cacheManager *cache.Manager, client *network.Client, config Config) *Ha config.SumDBURL = "https://sum.golang.org" } + // Use provided credential store or create empty one + credStore := config.CredStore + if credStore == nil { + credStore = vcs.NewCredentialStore() + } + return &Handler{ - cache: cacheManager, - client: client, - upstream: config.Upstream, - sumDBURL: config.SumDBURL, + cache: cacheManager, + client: client, + upstream: config.Upstream, + sumDBURL: config.SumDBURL, + credExtractor: auth.NewCredentialExtractor(), + credHasher: auth.NewCredentialHasher(), + credValidator: auth.NewGoValidator(), + validationCache: auth.NewValidationCache(5 * time.Minute), + gitFetcher: vcs.NewGitFetcher("", credStore), + moduleBuilder: vcs.NewModuleBuilder(), } } @@ -88,8 +110,17 @@ func (h *Handler) handleList(ctx context.Context, w http.ResponseWriter, r *http url := h.upstream + path modulePath := h.extractModulePath(path) + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + entry, err := h.cache.Get(ctx, "go", modulePath, "list", func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, url, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + body, statusCode, err := h.client.Get(ctx, url, headers) if err != nil { return nil, "", err } @@ -119,8 +150,17 @@ func (h *Handler) handleInfo(ctx context.Context, w http.ResponseWriter, r *http // Use .info suffix to distinguish from .mod and .zip in cache cacheKey := modulePath + "/@v/" + version + ".info" + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + entry, err := h.cache.Get(ctx, "go", cacheKey, version, func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, url, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + body, statusCode, err := h.client.Get(ctx, url, headers) if err != nil { return nil, "", err } @@ -150,8 +190,17 @@ func (h *Handler) handleMod(ctx context.Context, w http.ResponseWriter, r *http. // Use .mod suffix to distinguish from .info and .zip in cache cacheKey := modulePath + "/@v/" + version + ".mod" + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + entry, err := h.cache.Get(ctx, "go", cacheKey, version, func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, url, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + body, statusCode, err := h.client.Get(ctx, url, headers) if err != nil { return nil, "", err } @@ -181,16 +230,55 @@ func (h *Handler) handleZip(ctx context.Context, w http.ResponseWriter, r *http. // Use .zip suffix to distinguish from .info and .mod in cache cacheKey := modulePath + "/@v/" + version + ".zip" + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + credHash := h.credHasher.Hash(credentials) + + log.Debug(). + Str("path", path). + Str("module", modulePath). + Str("version", version). + Str("url", url). + Str("cred_hash", credHash). + Bool("has_credentials", credentials != ""). + Msg("Handling Go module zip request") + entry, err := h.cache.Get(ctx, "go", cacheKey, version, func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, url, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + // Try upstream proxy first (fast path for public modules) + body, statusCode, err := h.client.Get(ctx, url, headers) + if err == nil && statusCode == http.StatusOK { + return body, url, nil + } + + // If upstream failed with 404 or 403, try git fallback (private modules) + if statusCode == http.StatusNotFound || statusCode == http.StatusForbidden { + if body != nil { + body.Close() + } + + log.Debug(). + Str("module", modulePath). + Str("version", version). + Int("upstream_status", statusCode). + Msg("Upstream proxy returned not found, trying git fallback") + + return h.fetchModuleFromGit(ctx, modulePath, version, credentials) + } + + // Other errors + if body != nil { + body.Close() + } if err != nil { return nil, "", err } - if statusCode != http.StatusOK { - body.Close() - return nil, "", fmt.Errorf("upstream returned status %d", statusCode) - } - return body, url, nil + return nil, "", fmt.Errorf("upstream returned status %d", statusCode) }) if err != nil { @@ -208,6 +296,58 @@ func (h *Handler) handleZip(ctx context.Context, w http.ResponseWriter, r *http. } defer entry.Data.Close() + // CRITICAL SECURITY CHECK: If module requires auth, validate credentials + if entry.Package != nil && entry.Package.RequiresAuth { + // Check validation cache first + allowed, cached, reason := h.validationCache.Get(credHash, modulePath) + if cached { + if !allowed { + log.Warn(). + Str("module", modulePath). + Str("version", version). + Str("reason", reason). + Msg("Access denied (cached validation)") + http.Error(w, "Module not found", http.StatusNotFound) + return + } + log.Debug(). + Str("module", modulePath). + Str("version", version). + Msg("Access granted (cached validation)") + } else { + // Validate with upstream using git ls-remote + log.Debug(). + Str("module", modulePath). + Str("version", version). + Str("provider", entry.Package.AuthProvider). + Msg("Validating credentials with upstream") + + allowed, err := h.credValidator.ValidateAccess(ctx, modulePath, credentials) + if err != nil { + reason = err.Error() + } + + // Cache validation result + h.validationCache.Set(credHash, modulePath, allowed, reason) + + if !allowed { + log.Warn(). + Str("module", modulePath). + Str("version", version). + Err(err). + Msg("Access denied by upstream") + // Return 404 (same as GitHub does for private repos) + http.Error(w, "Module not found", http.StatusNotFound) + return + } + + log.Debug(). + Str("module", modulePath). + Str("version", version). + Msg("Access granted by upstream") + } + } + w.Header().Set("Content-Type", "application/zip") io.Copy(w, entry.Data) } @@ -217,8 +357,17 @@ func (h *Handler) handleLatest(ctx context.Context, w http.ResponseWriter, r *ht url := h.upstream + path modulePath := h.extractModulePath(path) + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + entry, err := h.cache.Get(ctx, "go", modulePath, "latest", func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, url, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + body, statusCode, err := h.client.Get(ctx, url, headers) if err != nil { return nil, "", err } @@ -297,3 +446,40 @@ func (h *Handler) extractModulePath(path string) string { // Fallback: remove /@latest suffix if present return strings.TrimSuffix(path, "/@latest") } + +// fetchModuleFromGit fetches a Go module directly from git repository +func (h *Handler) fetchModuleFromGit(ctx context.Context, modulePath, version, credentials string) (io.ReadCloser, string, error) { + log.Info(). + Str("module", modulePath). + Str("version", version). + Msg("Fetching module from git repository") + + // 1. Fetch module source from git + srcPath, err := h.gitFetcher.FetchModule(ctx, modulePath, version, credentials) + if err != nil { + return nil, "", fmt.Errorf("git fetch failed: %w", err) + } + defer h.gitFetcher.Cleanup(srcPath) + + // 2. Validate module + if err := h.moduleBuilder.ValidateModule(ctx, srcPath, modulePath); err != nil { + return nil, "", fmt.Errorf("module validation failed: %w", err) + } + + // 3. Build module zip + zipReader, err := h.moduleBuilder.BuildModuleZip(ctx, srcPath, modulePath, version) + if err != nil { + return nil, "", fmt.Errorf("module zip build failed: %w", err) + } + + // Create source URL for logging + sourceURL := fmt.Sprintf("git+https://%s@%s", modulePath, version) + + log.Info(). + Str("module", modulePath). + Str("version", version). + Str("source", sourceURL). + Msg("Successfully built module from git") + + return zipReader, sourceURL, nil +} diff --git a/pkg/proxy/npm/npm.go b/pkg/proxy/npm/npm.go index 504fa43..065dd4e 100644 --- a/pkg/proxy/npm/npm.go +++ b/pkg/proxy/npm/npm.go @@ -8,7 +8,9 @@ import ( "io" "net/http" "strings" + "time" + "github.com/lukaszraczylo/gohoarder/pkg/auth" "github.com/lukaszraczylo/gohoarder/pkg/cache" "github.com/lukaszraczylo/gohoarder/pkg/errors" "github.com/lukaszraczylo/gohoarder/pkg/network" @@ -17,9 +19,13 @@ import ( // Handler implements the NPM registry protocol type Handler struct { - cache *cache.Manager - client *network.Client - upstream string + cache *cache.Manager + client *network.Client + upstream string + credExtractor *auth.CredentialExtractor + credHasher *auth.CredentialHasher + credValidator *auth.NPMValidator + validationCache *auth.ValidationCache } // Config holds NPM proxy configuration @@ -34,9 +40,13 @@ func New(cacheManager *cache.Manager, client *network.Client, config Config) *Ha } return &Handler{ - cache: cacheManager, - client: client, - upstream: config.Upstream, + cache: cacheManager, + client: client, + upstream: config.Upstream, + credExtractor: auth.NewCredentialExtractor(), + credHasher: auth.NewCredentialHasher(), + credValidator: auth.NewNPMValidator(), + validationCache: auth.NewValidationCache(5 * time.Minute), } } @@ -123,6 +133,10 @@ func (h *Handler) handleMetadata(ctx context.Context, w http.ResponseWriter, r * func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) { packageName, version := extractTarballInfo(path) + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + credHash := h.credHasher.Hash(credentials) + // Construct proper upstream URL with /-/ format // Format: https://registry.npmjs.org/package/-/package-version.tgz tarballFilename := strings.ReplaceAll(packageName, "/", "-") + "-" + version + ".tgz" @@ -133,10 +147,19 @@ func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *h Str("package", packageName). Str("version", version). Str("upstream_url", url). + Str("cred_hash", credHash). + Bool("has_credentials", credentials != ""). Msg("Handling tarball request") + // Try to get from cache first (with credential-aware key) entry, err := h.cache.Get(ctx, "npm", packageName, version, func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, url, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + body, statusCode, err := h.client.Get(ctx, url, headers) if err != nil { return nil, "", err } @@ -162,6 +185,57 @@ func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *h } defer entry.Data.Close() + // CRITICAL SECURITY CHECK: If package requires auth, validate credentials + if entry.Package != nil && entry.Package.RequiresAuth { + // Check validation cache first + allowed, cached, reason := h.validationCache.Get(credHash, url) + if cached { + if !allowed { + log.Warn(). + Str("package", packageName). + Str("version", version). + Str("reason", reason). + Msg("Access denied (cached validation)") + http.Error(w, "Access denied", http.StatusForbidden) + return + } + log.Debug(). + Str("package", packageName). + Str("version", version). + Msg("Access granted (cached validation)") + } else { + // Validate with upstream + log.Debug(). + Str("package", packageName). + Str("version", version). + Str("provider", entry.Package.AuthProvider). + Msg("Validating credentials with upstream") + + allowed, err := h.credValidator.ValidateAccess(ctx, url, credentials) + if err != nil { + reason = err.Error() + } + + // Cache validation result + h.validationCache.Set(credHash, url, allowed, reason) + + if !allowed { + log.Warn(). + Str("package", packageName). + Str("version", version). + Err(err). + Msg("Access denied by upstream") + http.Error(w, "Access denied", http.StatusForbidden) + return + } + + log.Debug(). + Str("package", packageName). + Str("version", version). + Msg("Access granted by upstream") + } + } + w.Header().Set("Content-Type", "application/octet-stream") io.Copy(w, entry.Data) } diff --git a/pkg/proxy/pypi/pypi.go b/pkg/proxy/pypi/pypi.go index c7d8b5b..8c0100a 100644 --- a/pkg/proxy/pypi/pypi.go +++ b/pkg/proxy/pypi/pypi.go @@ -8,7 +8,9 @@ import ( "net/http" "regexp" "strings" + "time" + "github.com/lukaszraczylo/gohoarder/pkg/auth" "github.com/lukaszraczylo/gohoarder/pkg/cache" "github.com/lukaszraczylo/gohoarder/pkg/errors" "github.com/lukaszraczylo/gohoarder/pkg/network" @@ -17,9 +19,13 @@ import ( // Handler implements the PyPI Simple API (PEP 503) type Handler struct { - cache *cache.Manager - client *network.Client - upstream string + cache *cache.Manager + client *network.Client + upstream string + credExtractor *auth.CredentialExtractor + credHasher *auth.CredentialHasher + credValidator *auth.PyPIValidator + validationCache *auth.ValidationCache } // Config holds PyPI proxy configuration @@ -34,9 +40,13 @@ func New(cacheManager *cache.Manager, client *network.Client, config Config) *Ha } return &Handler{ - cache: cacheManager, - client: client, - upstream: config.Upstream, + cache: cacheManager, + client: client, + upstream: config.Upstream, + credExtractor: auth.NewCredentialExtractor(), + credHasher: auth.NewCredentialHasher(), + credValidator: auth.NewPyPIValidator(), + validationCache: auth.NewValidationCache(5 * time.Minute), } } @@ -138,6 +148,10 @@ func (h *Handler) handlePackagePage(ctx context.Context, w http.ResponseWriter, func (h *Handler) handlePackageFile(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) { packageName, version := extractPackageFileInfo(path) + // Extract credentials from request + credentials := h.credExtractor.Extract(r) + credHash := h.credHasher.Hash(credentials) + // Check if we have the original URL from the rewritten package page originalURL := r.URL.Query().Get("original_url") @@ -152,8 +166,23 @@ func (h *Handler) handlePackageFile(ctx context.Context, w http.ResponseWriter, } } + log.Debug(). + Str("path", path). + Str("package", packageName). + Str("version", version). + Str("url", originalURL). + Str("cred_hash", credHash). + Bool("has_credentials", credentials != ""). + Msg("Handling PyPI package file request") + entry, err := h.cache.Get(ctx, "pypi", packageName, version, func(ctx context.Context) (io.ReadCloser, string, error) { - body, statusCode, err := h.client.Get(ctx, originalURL, nil) + // Prepare headers for upstream request + headers := make(map[string]string) + if credentials != "" { + headers["Authorization"] = credentials + } + + body, statusCode, err := h.client.Get(ctx, originalURL, headers) if err != nil { return nil, "", err } @@ -179,6 +208,57 @@ func (h *Handler) handlePackageFile(ctx context.Context, w http.ResponseWriter, } defer entry.Data.Close() + // CRITICAL SECURITY CHECK: If package requires auth, validate credentials + if entry.Package != nil && entry.Package.RequiresAuth { + // Check validation cache first + allowed, cached, reason := h.validationCache.Get(credHash, originalURL) + if cached { + if !allowed { + log.Warn(). + Str("package", packageName). + Str("version", version). + Str("reason", reason). + Msg("Access denied (cached validation)") + http.Error(w, "Access denied", http.StatusForbidden) + return + } + log.Debug(). + Str("package", packageName). + Str("version", version). + Msg("Access granted (cached validation)") + } else { + // Validate with upstream + log.Debug(). + Str("package", packageName). + Str("version", version). + Str("provider", entry.Package.AuthProvider). + Msg("Validating credentials with upstream") + + allowed, err := h.credValidator.ValidateAccess(ctx, originalURL, credentials) + if err != nil { + reason = err.Error() + } + + // Cache validation result + h.validationCache.Set(credHash, originalURL, allowed, reason) + + if !allowed { + log.Warn(). + Str("package", packageName). + Str("version", version). + Err(err). + Msg("Access denied by upstream") + http.Error(w, "Access denied", http.StatusForbidden) + return + } + + log.Debug(). + Str("package", packageName). + Str("version", version). + Msg("Access granted by upstream") + } + } + // Determine content type based on file extension contentType := "application/octet-stream" if strings.HasSuffix(path, ".whl") { diff --git a/pkg/vcs/credentials.go b/pkg/vcs/credentials.go new file mode 100644 index 0000000..c0c9980 --- /dev/null +++ b/pkg/vcs/credentials.go @@ -0,0 +1,247 @@ +package vcs + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/rs/zerolog/log" +) + +// CredentialStore manages git credentials for different repository patterns +type CredentialStore struct { + credentials []CredentialEntry +} + +// CredentialEntry represents credentials for a specific pattern +type CredentialEntry struct { + Pattern string `json:"pattern"` // Glob pattern: "github.com/myorg/*" + Host string `json:"host"` // Git host: "github.com" + Username string `json:"username"` // Usually "oauth2" for tokens + Token string `json:"token"` // Access token + Fallback bool `json:"fallback"` // Use as fallback if no match +} + +// CredentialConfig represents the JSON configuration format +type CredentialConfig struct { + Credentials []CredentialEntry `json:"credentials"` +} + +// NewCredentialStore creates a new credential store +func NewCredentialStore() *CredentialStore { + return &CredentialStore{ + credentials: make([]CredentialEntry, 0), + } +} + +// LoadFromFile loads credentials from a JSON file +func (cs *CredentialStore) LoadFromFile(path string) error { + if path == "" { + log.Debug().Msg("No credential file specified, using system git config") + return nil + } + + // Check if file exists + if _, err := os.Stat(path); os.IsNotExist(err) { + log.Warn().Str("path", path).Msg("Credential file not found, using system git config") + return nil + } + + data, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read credential file: %w", err) + } + + var config CredentialConfig + if err := json.Unmarshal(data, &config); err != nil { + return fmt.Errorf("failed to parse credential file: %w", err) + } + + cs.credentials = config.Credentials + + log.Info(). + Str("file", path). + Int("credentials", len(cs.credentials)). + Msg("Loaded git credentials from file") + + // Log patterns (not tokens!) for debugging + for i, cred := range cs.credentials { + log.Debug(). + Int("index", i). + Str("pattern", cred.Pattern). + Str("host", cred.Host). + Bool("fallback", cred.Fallback). + Msg("Registered credential pattern") + } + + return nil +} + +// GetCredentialsForModule finds the best matching credentials for a module path +// Returns (username, token, found) +func (cs *CredentialStore) GetCredentialsForModule(modulePath string) (string, string, bool) { + if len(cs.credentials) == 0 { + // No credentials configured, rely on system git config + return "", "", false + } + + // Find best match + var bestMatch *CredentialEntry + var fallbackMatch *CredentialEntry + bestMatchLen := 0 + + for i := range cs.credentials { + cred := &cs.credentials[i] + + // Check for fallback + if cred.Fallback { + fallbackMatch = cred + continue + } + + // Check if pattern matches + if cs.matchPattern(cred.Pattern, modulePath) { + // Use longest matching pattern (most specific) + if len(cred.Pattern) > bestMatchLen { + bestMatch = cred + bestMatchLen = len(cred.Pattern) + } + } + } + + // Use best match if found + if bestMatch != nil { + log.Debug(). + Str("module", modulePath). + Str("pattern", bestMatch.Pattern). + Str("host", bestMatch.Host). + Msg("Matched credential pattern") + return bestMatch.Username, bestMatch.Token, true + } + + // Use fallback if available + if fallbackMatch != nil { + log.Debug(). + Str("module", modulePath). + Str("pattern", fallbackMatch.Pattern). + Msg("Using fallback credentials") + return fallbackMatch.Username, fallbackMatch.Token, true + } + + // No match found + log.Debug(). + Str("module", modulePath). + Msg("No credential pattern matched, using system git config") + return "", "", false +} + +// matchPattern checks if a module path matches a credential pattern +// Supports glob-style patterns: +// - github.com/myorg/* matches github.com/myorg/repo1, github.com/myorg/repo2 +// - github.com/myorg/repo matches exactly github.com/myorg/repo +// - * matches everything +func (cs *CredentialStore) matchPattern(pattern, modulePath string) bool { + // Exact match + if pattern == modulePath { + return true + } + + // Wildcard match all + if pattern == "*" { + return true + } + + // Glob-style matching + matched, err := filepath.Match(pattern, modulePath) + if err != nil { + log.Warn().Err(err).Str("pattern", pattern).Msg("Invalid pattern") + return false + } + + if matched { + return true + } + + // Prefix matching with /* + if strings.HasSuffix(pattern, "/*") { + prefix := strings.TrimSuffix(pattern, "/*") + return strings.HasPrefix(modulePath, prefix+"/") + } + + return false +} + +// CreateNetrcContent creates .netrc file content for a specific host +func (cs *CredentialStore) CreateNetrcContent(host, username, token string) string { + return fmt.Sprintf("machine %s\nlogin %s\npassword %s\n", host, username, token) +} + +// GetCredentialsForHost finds credentials for a specific git host (e.g., "github.com") +// This is useful when you need credentials for a host but don't have a full module path +func (cs *CredentialStore) GetCredentialsForHost(host string) (string, string, bool) { + if len(cs.credentials) == 0 { + return "", "", false + } + + // Look for exact host match first + for i := range cs.credentials { + cred := &cs.credentials[i] + if cred.Host == host && !cred.Fallback { + log.Debug(). + Str("host", host). + Str("pattern", cred.Pattern). + Msg("Found credentials for host") + return cred.Username, cred.Token, true + } + } + + // Try fallback + for i := range cs.credentials { + cred := &cs.credentials[i] + if cred.Fallback { + log.Debug(). + Str("host", host). + Msg("Using fallback credentials for host") + return cred.Username, cred.Token, true + } + } + + return "", "", false +} + +// ValidateConfig validates the credential configuration +func (cs *CredentialStore) ValidateConfig() error { + hostPatterns := make(map[string]bool) + + for i, cred := range cs.credentials { + // Check required fields + if cred.Pattern == "" { + return fmt.Errorf("credential entry %d: pattern is required", i) + } + if cred.Host == "" && cred.Pattern != "*" { + return fmt.Errorf("credential entry %d: host is required (pattern: %s)", i, cred.Pattern) + } + if cred.Token == "" { + return fmt.Errorf("credential entry %d: token is required (pattern: %s)", i, cred.Pattern) + } + + // Set default username if not provided + if cred.Username == "" { + cs.credentials[i].Username = "oauth2" + } + + // Check for duplicate patterns + key := cred.Pattern + ":" + cred.Host + if hostPatterns[key] && !cred.Fallback { + log.Warn(). + Str("pattern", cred.Pattern). + Str("host", cred.Host). + Msg("Duplicate credential pattern, last one wins") + } + hostPatterns[key] = true + } + + return nil +} diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go new file mode 100644 index 0000000..9ef38a2 --- /dev/null +++ b/pkg/vcs/git.go @@ -0,0 +1,283 @@ +package vcs + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/rs/zerolog/log" +) + +// GitFetcher handles git repository operations +type GitFetcher struct { + workDir string + timeout time.Duration + credStore *CredentialStore +} + +// NewGitFetcher creates a new git fetcher +func NewGitFetcher(workDir string, credStore *CredentialStore) *GitFetcher { + if workDir == "" { + workDir = os.TempDir() + } + + if credStore == nil { + credStore = NewCredentialStore() + } + + return &GitFetcher{ + workDir: workDir, + timeout: 30 * time.Second, + credStore: credStore, + } +} + +// FetchModule clones a git repository and checks out a specific version +// Returns the path to the checked-out source directory +func (g *GitFetcher) FetchModule(ctx context.Context, modulePath, version, credentials string) (string, error) { + // Create context with timeout + ctx, cancel := context.WithTimeout(ctx, g.timeout) + defer cancel() + + // Parse module path to extract repository URL + repoURL, err := g.modulePathToRepoURL(modulePath) + if err != nil { + return "", err + } + + // Create temporary directory for this clone + cloneDir, err := os.MkdirTemp(g.workDir, "gohoarder-git-*") + if err != nil { + return "", fmt.Errorf("failed to create temp directory: %w", err) + } + + log.Debug(). + Str("module", modulePath). + Str("version", version). + Str("repo_url", repoURL). + Str("clone_dir", cloneDir). + Msg("Fetching module from git") + + // Set up credentials + credentialHelper, cleanup, err := g.setupCredentials(repoURL, modulePath, credentials) + if err != nil { + os.RemoveAll(cloneDir) + return "", fmt.Errorf("failed to setup credentials: %w", err) + } + defer cleanup() + + // Try shallow clone with specific version first (fastest) + if err := g.shallowClone(ctx, repoURL, version, cloneDir, credentialHelper); err != nil { + log.Debug().Err(err).Msg("Shallow clone failed, trying full clone") + + // Fallback to full clone + if err := g.fullClone(ctx, repoURL, cloneDir, credentialHelper); err != nil { + os.RemoveAll(cloneDir) + return "", fmt.Errorf("git clone failed: %w", err) + } + + // Checkout specific version + if err := g.checkout(ctx, cloneDir, version); err != nil { + os.RemoveAll(cloneDir) + return "", fmt.Errorf("git checkout failed: %w", err) + } + } + + log.Debug(). + Str("module", modulePath). + Str("version", version). + Str("path", cloneDir). + Msg("Successfully fetched module from git") + + return cloneDir, nil +} + +// modulePathToRepoURL converts a Go module path to a git repository URL +// Examples: +// github.com/user/repo → https://github.com/user/repo.git +// gitlab.com/group/project → https://gitlab.com/group/project.git +func (g *GitFetcher) modulePathToRepoURL(modulePath string) (string, error) { + // Remove any path components after the repository + // e.g., github.com/user/repo/v2 → github.com/user/repo + parts := strings.Split(modulePath, "/") + if len(parts) < 3 { + return "", fmt.Errorf("invalid module path: %s", modulePath) + } + + // For github.com, gitlab.com, bitbucket.org, etc. + // Format: host/owner/repo + host := parts[0] + owner := parts[1] + repo := parts[2] + + // Remove version suffix if present (e.g., /v2, /v3) + repo = strings.TrimPrefix(repo, "v") + if strings.HasPrefix(repo, "2") || strings.HasPrefix(repo, "3") { + // This might be a version suffix, but we need to be careful + // For now, keep it as-is + } + + repoURL := fmt.Sprintf("https://%s/%s/%s.git", host, owner, repo) + return repoURL, nil +} + +// setupCredentials configures git credentials for authentication +// Returns credential helper configuration and cleanup function +func (g *GitFetcher) setupCredentials(repoURL, modulePath, credentials string) (map[string]string, func(), error) { + env := make(map[string]string) + cleanup := func() {} + + // Priority 1: Check credential store for pattern-based credentials + if g.credStore != nil { + username, token, found := g.credStore.GetCredentialsForModule(modulePath) + if found { + log.Debug(). + Str("module", modulePath). + Msg("Using credentials from credential store") + return g.createTempNetrc(repoURL, username, token) + } + } + + // Priority 2: Use credentials from HTTP Authorization header (if provided) + if credentials != "" { + log.Debug().Msg("Using credentials from Authorization header") + return g.createTempNetrcFromHeader(repoURL, credentials) + } + + // Priority 3: Rely on system git config (.netrc, etc.) + log.Debug().Msg("No credentials provided, using system git config") + return env, cleanup, nil +} + +// createTempNetrc creates a temporary .netrc file with the provided credentials +func (g *GitFetcher) createTempNetrc(repoURL, username, token string) (map[string]string, func(), error) { + // Create temporary .netrc file + tempDir, err := os.MkdirTemp("", "gohoarder-netrc-*") + if err != nil { + return nil, nil, fmt.Errorf("failed to create temp netrc directory: %w", err) + } + + // Extract host from repo URL + host := g.extractHost(repoURL) + + // Create .netrc file + netrcPath := filepath.Join(tempDir, ".netrc") + netrcContent := fmt.Sprintf("machine %s\nlogin %s\npassword %s\n", host, username, token) + if err := os.WriteFile(netrcPath, []byte(netrcContent), 0600); err != nil { + os.RemoveAll(tempDir) + return nil, nil, fmt.Errorf("failed to write .netrc: %w", err) + } + + env := map[string]string{ + "HOME": tempDir, + "GIT_TERMINAL_PROMPT": "0", + } + + cleanup := func() { + os.RemoveAll(tempDir) + } + + log.Debug().Str("host", host).Msg("Created temporary .netrc for git authentication") + + return env, cleanup, nil +} + +// createTempNetrcFromHeader creates a temporary .netrc from Authorization header credentials +func (g *GitFetcher) createTempNetrcFromHeader(repoURL, credentials string) (map[string]string, func(), error) { + // Extract token from credentials + token := strings.TrimPrefix(credentials, "Bearer ") + token = strings.TrimPrefix(token, "Token ") + token = strings.TrimPrefix(token, "Private-Token ") + + if token == "" || token == credentials { + // Not in expected format, rely on system config + log.Debug().Msg("Credentials not in Bearer/Token format, using system git config") + return make(map[string]string), func() {}, nil + } + + // Use oauth2 as default username for token-based auth + return g.createTempNetrc(repoURL, "oauth2", token) +} + +// extractHost extracts the git host from a repository URL +func (g *GitFetcher) extractHost(repoURL string) string { + if strings.Contains(repoURL, "github.com") { + return "github.com" + } + if strings.Contains(repoURL, "gitlab.com") { + return "gitlab.com" + } + if strings.Contains(repoURL, "bitbucket.org") { + return "bitbucket.org" + } + + // Generic extraction + parts := strings.Split(repoURL, "/") + if len(parts) >= 3 { + return strings.TrimPrefix(parts[2], "//") + } + + return "" +} + +// shallowClone performs a shallow clone of a specific version +func (g *GitFetcher) shallowClone(ctx context.Context, repoURL, version, cloneDir string, credentialHelper map[string]string) error { + cmd := exec.CommandContext(ctx, "git", "clone", "--depth", "1", "--branch", version, repoURL, cloneDir) + cmd.Env = append(os.Environ(), g.envMapToSlice(credentialHelper)...) + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("shallow clone failed: %w (output: %s)", err, string(output)) + } + + return nil +} + +// fullClone performs a full clone of the repository +func (g *GitFetcher) fullClone(ctx context.Context, repoURL, cloneDir string, credentialHelper map[string]string) error { + cmd := exec.CommandContext(ctx, "git", "clone", repoURL, cloneDir) + cmd.Env = append(os.Environ(), g.envMapToSlice(credentialHelper)...) + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("full clone failed: %w (output: %s)", err, string(output)) + } + + return nil +} + +// checkout checks out a specific version (tag, branch, or commit) +func (g *GitFetcher) checkout(ctx context.Context, repoDir, version string) error { + cmd := exec.CommandContext(ctx, "git", "checkout", version) + cmd.Dir = repoDir + cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0") + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("checkout failed: %w (output: %s)", err, string(output)) + } + + return nil +} + +// envMapToSlice converts environment map to slice +func (g *GitFetcher) envMapToSlice(envMap map[string]string) []string { + var env []string + for k, v := range envMap { + env = append(env, fmt.Sprintf("%s=%s", k, v)) + } + return env +} + +// Cleanup removes temporary directories +func (g *GitFetcher) Cleanup(paths ...string) { + for _, path := range paths { + if err := os.RemoveAll(path); err != nil { + log.Warn().Err(err).Str("path", path).Msg("Failed to cleanup temporary directory") + } + } +} diff --git a/pkg/vcs/module.go b/pkg/vcs/module.go new file mode 100644 index 0000000..fb20af9 --- /dev/null +++ b/pkg/vcs/module.go @@ -0,0 +1,252 @@ +package vcs + +import ( + "archive/zip" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/rs/zerolog/log" +) + +// ModuleBuilder builds Go module artifacts from source +type ModuleBuilder struct{} + +// NewModuleBuilder creates a new module builder +func NewModuleBuilder() *ModuleBuilder { + return &ModuleBuilder{} +} + +// ModuleInfo represents Go module version metadata (.info file) +type ModuleInfo struct { + Version string `json:"Version"` + Time time.Time `json:"Time"` +} + +// BuildModuleZip creates a Go module zip from source directory +// Follows the Go module zip format specification: https://go.dev/ref/mod#zip-files +func (b *ModuleBuilder) BuildModuleZip(ctx context.Context, srcPath, modulePath, version string) (io.ReadCloser, error) { + log.Debug(). + Str("src_path", srcPath). + Str("module", modulePath). + Str("version", version). + Msg("Building module zip") + + // Create in-memory zip + var buf bytes.Buffer + zipWriter := zip.NewWriter(&buf) + + // Collect all files to include in zip + files, err := b.collectFiles(srcPath) + if err != nil { + return nil, fmt.Errorf("failed to collect files: %w", err) + } + + // Sort files for deterministic zip + sort.Strings(files) + + // Add files to zip with proper prefix + prefix := fmt.Sprintf("%s@%s/", modulePath, version) + for _, relPath := range files { + if err := b.addFileToZip(zipWriter, srcPath, relPath, prefix); err != nil { + zipWriter.Close() + return nil, fmt.Errorf("failed to add file %s: %w", relPath, err) + } + } + + if err := zipWriter.Close(); err != nil { + return nil, fmt.Errorf("failed to close zip writer: %w", err) + } + + log.Debug(). + Str("module", modulePath). + Str("version", version). + Int("files", len(files)). + Int("size", buf.Len()). + Msg("Successfully built module zip") + + return io.NopCloser(bytes.NewReader(buf.Bytes())), nil +} + +// collectFiles walks the source directory and collects files to include +func (b *ModuleBuilder) collectFiles(srcPath string) ([]string, error) { + var files []string + + err := filepath.Walk(srcPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip directories + if info.IsDir() { + // Skip .git directory + if info.Name() == ".git" { + return filepath.SkipDir + } + // Skip vendor directory (per Go module zip spec) + if info.Name() == "vendor" { + return filepath.SkipDir + } + return nil + } + + // Get relative path + relPath, err := filepath.Rel(srcPath, path) + if err != nil { + return err + } + + // Skip hidden files (except .gitignore, etc. if needed) + if strings.HasPrefix(filepath.Base(relPath), ".") && relPath != ".gitignore" { + return nil + } + + // Include file + files = append(files, relPath) + return nil + }) + + if err != nil { + return nil, err + } + + return files, nil +} + +// addFileToZip adds a single file to the zip archive +func (b *ModuleBuilder) addFileToZip(zipWriter *zip.Writer, srcPath, relPath, prefix string) error { + // Create zip header + header := &zip.FileHeader{ + Name: prefix + filepath.ToSlash(relPath), + Method: zip.Deflate, + } + + // Get file info for permissions + fullPath := filepath.Join(srcPath, relPath) + info, err := os.Stat(fullPath) + if err != nil { + return err + } + + // Set modification time to a fixed value for deterministic zips + // Go uses the timestamp from the version info + header.Modified = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + header.SetMode(info.Mode()) + + // Create file in zip + writer, err := zipWriter.CreateHeader(header) + if err != nil { + return err + } + + // Copy file contents + file, err := os.Open(fullPath) + if err != nil { + return err + } + defer file.Close() + + if _, err := io.Copy(writer, file); err != nil { + return err + } + + return nil +} + +// GenerateModInfo creates .info file (JSON metadata) +func (b *ModuleBuilder) GenerateModInfo(ctx context.Context, srcPath, version string) ([]byte, error) { + // Get commit timestamp from git + timestamp, err := b.getGitCommitTime(srcPath) + if err != nil { + // Fallback to current time if git info not available + log.Warn().Err(err).Msg("Failed to get git commit time, using current time") + timestamp = time.Now() + } + + info := ModuleInfo{ + Version: version, + Time: timestamp, + } + + data, err := json.Marshal(info) + if err != nil { + return nil, fmt.Errorf("failed to marshal module info: %w", err) + } + + return data, nil +} + +// getGitCommitTime retrieves the commit timestamp from git +func (b *ModuleBuilder) getGitCommitTime(repoPath string) (time.Time, error) { + cmd := exec.Command("git", "log", "-1", "--format=%cI") + cmd.Dir = repoPath + + output, err := cmd.Output() + if err != nil { + return time.Time{}, err + } + + // Parse ISO 8601 timestamp + timestamp, err := time.Parse(time.RFC3339, strings.TrimSpace(string(output))) + if err != nil { + return time.Time{}, err + } + + return timestamp, nil +} + +// ExtractGoMod extracts go.mod content +func (b *ModuleBuilder) ExtractGoMod(ctx context.Context, srcPath string) ([]byte, error) { + goModPath := filepath.Join(srcPath, "go.mod") + + data, err := os.ReadFile(goModPath) + if err != nil { + return nil, fmt.Errorf("failed to read go.mod: %w", err) + } + + // Validate go.mod (basic check) + if !strings.Contains(string(data), "module ") { + return nil, fmt.Errorf("invalid go.mod: missing module directive") + } + + return data, nil +} + +// ValidateModule performs basic validation on the module +func (b *ModuleBuilder) ValidateModule(ctx context.Context, srcPath, expectedModulePath string) error { + // Read go.mod + goModData, err := b.ExtractGoMod(ctx, srcPath) + if err != nil { + return err + } + + // Extract module path from go.mod + lines := strings.Split(string(goModData), "\n") + var declaredModulePath string + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "module ") { + declaredModulePath = strings.TrimSpace(strings.TrimPrefix(line, "module ")) + break + } + } + + if declaredModulePath == "" { + return fmt.Errorf("go.mod missing module declaration") + } + + // Check if module path matches (allow version suffixes) + if !strings.HasPrefix(expectedModulePath, declaredModulePath) { + return fmt.Errorf("module path mismatch: expected %s, got %s", expectedModulePath, declaredModulePath) + } + + return nil +} diff --git a/script/generate-version.sh b/script/generate-version.sh new file mode 100755 index 0000000..7f90602 --- /dev/null +++ b/script/generate-version.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -e + +# generate-version.sh +# Generates semantic version based on git tags and commits +# +# Usage: +# ./script/generate-version.sh +# +# Environment variables (optional): +# VERSION_PREFIX - Prefix for version tags (default: v) +# FALLBACK_VERSION - Version to use if no tags found (default: 0.0.0) + +VERSION_PREFIX="${VERSION_PREFIX:-v}" +FALLBACK_VERSION="${FALLBACK_VERSION:-0.0.0}" + +# Try to get version from git describe +if git describe --tags --abbrev=0 2>/dev/null >/dev/null; then + # Get the latest tag + LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null) + + # Remove prefix if present + VERSION="${LATEST_TAG#$VERSION_PREFIX}" + + # Get commits since last tag + COMMITS_SINCE_TAG=$(git rev-list ${LATEST_TAG}..HEAD --count 2>/dev/null || echo "0") + + # Get current commit hash + COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") + + # If there are commits since the last tag, add pre-release identifier + if [ "$COMMITS_SINCE_TAG" != "0" ]; then + # Increment patch version and add pre-release identifier + # Parse the version + IFS='.' read -r MAJOR MINOR PATCH <<< "$VERSION" + + # Increment patch for next development version + NEXT_PATCH=$((PATCH + 1)) + + # Generate pre-release version + VERSION="${MAJOR}.${MINOR}.${NEXT_PATCH}-dev.${COMMITS_SINCE_TAG}+${COMMIT_HASH}" + fi +else + # No tags found, use fallback + COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") + COMMIT_COUNT=$(git rev-list --count HEAD 2>/dev/null || echo "0") + VERSION="${FALLBACK_VERSION}-dev.${COMMIT_COUNT}+${COMMIT_HASH}" +fi + +# Check if working directory is dirty +if [ -n "$(git status --porcelain 2>/dev/null)" ]; then + VERSION="${VERSION}-dirty" +fi + +echo "$VERSION"