mirror of
https://github.com/lukaszraczylo/gohoarder.git
synced 2026-06-05 22:53:53 +00:00
6b037a92b4
- [x] Reorder struct fields across codebase for consistency - [x] Add analytics event handlers and tests - [x] Add authentication API key management handlers and tests - [x] Add pre-warming control handlers and tests - [x] Implement S3 storage backend with tests - [x] Implement SMB/CIFS storage backend with tests - [x] Add CDN middleware tests - [x] Integrate analytics tracking into cache manager - [x] Add S3 and SMB storage initialization in app setup - [x] Add CDN caching to proxy handlers - [x] Remove distributed locking (Redis lock manager) - [x] Remove proxy common package and utilities - [x] Remove standalone HTTP server package - [x] Remove logger middleware - [x] Simplify error handling utilities - [x] Update config with S3 and SMB options - [x] Update cache manager signature to include analytics
378 lines
12 KiB
Go
378 lines
12 KiB
Go
package npm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/lukaszraczylo/gohoarder/pkg/auth"
|
|
"github.com/lukaszraczylo/gohoarder/pkg/cache"
|
|
"github.com/lukaszraczylo/gohoarder/pkg/errors"
|
|
"github.com/lukaszraczylo/gohoarder/pkg/network"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// Handler implements the NPM registry protocol
|
|
type Handler struct {
|
|
cache *cache.Manager
|
|
client *network.Client
|
|
credExtractor *auth.CredentialExtractor
|
|
credHasher *auth.CredentialHasher
|
|
credValidator *auth.NPMValidator
|
|
validationCache *auth.ValidationCache
|
|
upstream string
|
|
}
|
|
|
|
// Config holds NPM proxy configuration
|
|
type Config struct {
|
|
Upstream string // Upstream NPM registry (e.g., registry.npmjs.org)
|
|
}
|
|
|
|
// New creates a new NPM proxy handler
|
|
func New(cacheManager *cache.Manager, client *network.Client, config Config) *Handler {
|
|
if config.Upstream == "" {
|
|
config.Upstream = "https://registry.npmjs.org"
|
|
}
|
|
|
|
return &Handler{
|
|
cache: cacheManager,
|
|
client: client,
|
|
upstream: config.Upstream,
|
|
credExtractor: auth.NewCredentialExtractor(),
|
|
credHasher: auth.NewCredentialHasher(),
|
|
credValidator: auth.NewNPMValidator(),
|
|
validationCache: auth.NewValidationCache(5 * time.Minute),
|
|
}
|
|
}
|
|
|
|
// ServeHTTP handles NPM registry requests
|
|
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|
ctx := r.Context()
|
|
path := strings.TrimPrefix(r.URL.Path, "/npm")
|
|
|
|
log.Debug().Str("path", path).Str("method", r.Method).Msg("NPM proxy request")
|
|
|
|
// Handle different NPM request types
|
|
// Check for tarballs FIRST before special endpoints (tarballs also contain "/-/")
|
|
if isTarballRequest(path) {
|
|
// Package tarball: /@scope/package/-/package-version.tgz
|
|
h.handleTarball(ctx, w, r, path)
|
|
} else if strings.Contains(path, "/-/") {
|
|
// Special NPM endpoints (e.g., /-/ping, /-/user/token)
|
|
h.handleSpecial(ctx, w, r, path)
|
|
} else if isPackageMetadata(path) {
|
|
// Package metadata: /@scope/package or /package
|
|
h.handleMetadata(ctx, w, r, path)
|
|
} else {
|
|
http.Error(w, "Invalid NPM request", http.StatusBadRequest)
|
|
}
|
|
}
|
|
|
|
// handleMetadata handles package metadata requests
|
|
func (h *Handler) handleMetadata(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) {
|
|
url := h.upstream + path
|
|
packageName := extractPackageName(path)
|
|
|
|
entry, err := h.cache.Get(ctx, "npm", packageName, "metadata", func(ctx context.Context) (io.ReadCloser, string, error) {
|
|
body, statusCode, err := h.client.Get(ctx, url, nil)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
if statusCode != http.StatusOK {
|
|
body.Close() // #nosec G104 -- Cleanup, error not critical
|
|
return nil, "", fmt.Errorf("upstream returned status %d", statusCode)
|
|
}
|
|
return body, url, nil
|
|
})
|
|
|
|
if err != nil {
|
|
log.Error().Err(err).Str("url", url).Msg("Failed to fetch package metadata")
|
|
http.Error(w, "Failed to fetch package metadata", http.StatusBadGateway)
|
|
return
|
|
}
|
|
defer entry.Data.Close() // #nosec G104 -- Cleanup, error not critical
|
|
|
|
// Read metadata into memory for URL rewriting
|
|
var buf bytes.Buffer
|
|
if _, err := io.Copy(&buf, entry.Data); err != nil {
|
|
log.Error().Err(err).Msg("Failed to read metadata")
|
|
http.Error(w, "Failed to read metadata", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Parse JSON metadata
|
|
var metadata map[string]interface{}
|
|
if err := json.Unmarshal(buf.Bytes(), &metadata); err != nil {
|
|
log.Error().Err(err).Msg("Failed to parse metadata JSON")
|
|
http.Error(w, "Failed to parse metadata", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Rewrite tarball URLs to point to our proxy
|
|
proxyBaseURL := getProxyBaseURL(r)
|
|
rewriteMetadataURLs(metadata, h.upstream, proxyBaseURL)
|
|
|
|
// Serialize modified metadata
|
|
modifiedJSON, err := json.Marshal(metadata)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to serialize modified metadata")
|
|
http.Error(w, "Failed to serialize metadata", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json; charset=UTF-8")
|
|
_, _ = w.Write(modifiedJSON) // #nosec G104 -- Websocket buffer write
|
|
}
|
|
|
|
// handleTarball handles package tarball requests
|
|
func (h *Handler) handleTarball(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) {
|
|
packageName, version := extractTarballInfo(path)
|
|
|
|
// Extract credentials from request
|
|
credentials := h.credExtractor.Extract(r)
|
|
credHash := h.credHasher.Hash(credentials)
|
|
|
|
// Construct proper upstream URL with /-/ format
|
|
// Format: https://registry.npmjs.org/package/-/package-version.tgz
|
|
tarballFilename := strings.ReplaceAll(packageName, "/", "-") + "-" + version + ".tgz"
|
|
url := fmt.Sprintf("%s/%s/-/%s", h.upstream, packageName, tarballFilename)
|
|
|
|
log.Debug().
|
|
Str("path", path).
|
|
Str("package", packageName).
|
|
Str("version", version).
|
|
Str("upstream_url", url).
|
|
Str("cred_hash", credHash).
|
|
Bool("has_credentials", credentials != "").
|
|
Msg("Handling tarball request")
|
|
|
|
// Try to get from cache first (with credential-aware key)
|
|
entry, err := h.cache.Get(ctx, "npm", packageName, version, func(ctx context.Context) (io.ReadCloser, string, error) {
|
|
// Prepare headers for upstream request
|
|
headers := make(map[string]string)
|
|
if credentials != "" {
|
|
headers["Authorization"] = credentials
|
|
}
|
|
|
|
body, statusCode, err := h.client.Get(ctx, url, headers)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
if statusCode != http.StatusOK {
|
|
body.Close() // #nosec G104 -- Cleanup, error not critical
|
|
return nil, "", fmt.Errorf("upstream returned status %d", statusCode)
|
|
}
|
|
return body, url, nil
|
|
})
|
|
|
|
if err != nil {
|
|
log.Error().Err(err).Str("url", url).Msg("Failed to fetch package tarball")
|
|
|
|
// Check if error is a security violation - return 403 Forbidden
|
|
if ghErr, ok := err.(*errors.Error); ok && ghErr.Code == errors.ErrCodeSecurityViolation {
|
|
http.Error(w, fmt.Sprintf("Package blocked: %s", ghErr.Message), http.StatusForbidden)
|
|
return
|
|
}
|
|
|
|
// All other errors return 502 Bad Gateway (upstream issues)
|
|
http.Error(w, "Failed to fetch package tarball", http.StatusBadGateway)
|
|
return
|
|
}
|
|
defer entry.Data.Close() // #nosec G104 -- Cleanup, error not critical
|
|
|
|
// CRITICAL SECURITY CHECK: If package requires auth, validate credentials
|
|
if entry.Package != nil && entry.Package.RequiresAuth {
|
|
// Check validation cache first
|
|
allowed, cached, reason := h.validationCache.Get(credHash, url)
|
|
if cached {
|
|
if !allowed {
|
|
log.Warn().
|
|
Str("package", packageName).
|
|
Str("version", version).
|
|
Str("reason", reason).
|
|
Msg("Access denied (cached validation)")
|
|
http.Error(w, "Access denied", http.StatusForbidden)
|
|
return
|
|
}
|
|
log.Debug().
|
|
Str("package", packageName).
|
|
Str("version", version).
|
|
Msg("Access granted (cached validation)")
|
|
} else {
|
|
// Validate with upstream
|
|
log.Debug().
|
|
Str("package", packageName).
|
|
Str("version", version).
|
|
Str("provider", entry.Package.AuthProvider).
|
|
Msg("Validating credentials with upstream")
|
|
|
|
allowed, err := h.credValidator.ValidateAccess(ctx, url, credentials)
|
|
if err != nil {
|
|
reason = err.Error()
|
|
}
|
|
|
|
// Cache validation result
|
|
h.validationCache.Set(credHash, url, allowed, reason)
|
|
|
|
if !allowed {
|
|
log.Warn().
|
|
Str("package", packageName).
|
|
Str("version", version).
|
|
Err(err).
|
|
Msg("Access denied by upstream")
|
|
http.Error(w, "Access denied", http.StatusForbidden)
|
|
return
|
|
}
|
|
|
|
log.Debug().
|
|
Str("package", packageName).
|
|
Str("version", version).
|
|
Msg("Access granted by upstream")
|
|
}
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/octet-stream")
|
|
_, _ = io.Copy(w, entry.Data) // #nosec G104 -- HTTP response write
|
|
}
|
|
|
|
// handleSpecial handles special NPM endpoints
|
|
func (h *Handler) handleSpecial(ctx context.Context, w http.ResponseWriter, r *http.Request, path string) {
|
|
url := h.upstream + path
|
|
|
|
// Don't cache special endpoints, proxy directly
|
|
body, statusCode, err := h.client.Get(ctx, url, nil)
|
|
if err != nil {
|
|
log.Error().Err(err).Str("url", url).Msg("Failed to fetch special endpoint")
|
|
http.Error(w, "Failed to fetch from upstream", http.StatusBadGateway)
|
|
return
|
|
}
|
|
defer body.Close() // #nosec G104 -- Cleanup, error not critical
|
|
|
|
w.WriteHeader(statusCode)
|
|
_, _ = io.Copy(w, body) // #nosec G104 -- HTTP response write
|
|
}
|
|
|
|
// isTarballRequest checks if the request is for a tarball
|
|
func isTarballRequest(path string) bool {
|
|
return strings.HasSuffix(path, ".tgz") || strings.HasSuffix(path, ".tar.gz")
|
|
}
|
|
|
|
// isPackageMetadata checks if the request is for package metadata
|
|
func isPackageMetadata(path string) bool {
|
|
// Package metadata doesn't have file extensions
|
|
return !isTarballRequest(path) && !strings.Contains(path, "/-/")
|
|
}
|
|
|
|
// extractPackageName extracts package name from path
|
|
func extractPackageName(path string) string {
|
|
// Remove leading slash
|
|
path = strings.TrimPrefix(path, "/")
|
|
|
|
// Handle scoped packages (@scope/package)
|
|
if strings.HasPrefix(path, "@") {
|
|
parts := strings.Split(path, "/")
|
|
if len(parts) >= 2 {
|
|
return parts[0] + "/" + parts[1]
|
|
}
|
|
}
|
|
|
|
// Regular package
|
|
parts := strings.Split(path, "/")
|
|
if len(parts) > 0 {
|
|
return parts[0]
|
|
}
|
|
|
|
return path
|
|
}
|
|
|
|
// extractTarballInfo extracts package name and version from tarball path
|
|
func extractTarballInfo(path string) (string, string) {
|
|
// Format: /@scope/package/-/package-version.tgz
|
|
// or: /package/-/package-version.tgz
|
|
// Also handle: /package/package-version.tgz (fallback)
|
|
|
|
// Try standard format with /-/
|
|
parts := strings.Split(path, "/-/")
|
|
if len(parts) == 2 {
|
|
packageName := extractPackageName(parts[0])
|
|
tarballName := parts[1]
|
|
tarballName = strings.TrimSuffix(tarballName, ".tgz")
|
|
tarballName = strings.TrimSuffix(tarballName, ".tar.gz")
|
|
|
|
// Remove package name prefix to get version
|
|
prefix := strings.ReplaceAll(packageName, "/", "-") + "-"
|
|
version := strings.TrimPrefix(tarballName, prefix)
|
|
|
|
return packageName, version
|
|
}
|
|
|
|
// Fallback: parse path without /-/
|
|
// Format: /package/package-version.tgz or /@scope/package/package-version.tgz
|
|
path = strings.TrimPrefix(path, "/")
|
|
pathParts := strings.Split(path, "/")
|
|
|
|
if len(pathParts) < 2 {
|
|
return "", ""
|
|
}
|
|
|
|
var packageName, tarballName string
|
|
|
|
// Handle scoped packages
|
|
if strings.HasPrefix(pathParts[0], "@") && len(pathParts) >= 3 {
|
|
packageName = pathParts[0] + "/" + pathParts[1]
|
|
tarballName = pathParts[len(pathParts)-1]
|
|
} else {
|
|
packageName = pathParts[0]
|
|
tarballName = pathParts[len(pathParts)-1]
|
|
}
|
|
|
|
tarballName = strings.TrimSuffix(tarballName, ".tgz")
|
|
tarballName = strings.TrimSuffix(tarballName, ".tar.gz")
|
|
|
|
// Remove package name prefix to get version
|
|
prefix := strings.ReplaceAll(packageName, "/", "-") + "-"
|
|
version := strings.TrimPrefix(tarballName, prefix)
|
|
|
|
return packageName, version
|
|
}
|
|
|
|
// getProxyBaseURL constructs the proxy base URL from the request
|
|
func getProxyBaseURL(r *http.Request) string {
|
|
scheme := "http"
|
|
if r.TLS != nil {
|
|
scheme = "https"
|
|
}
|
|
host := r.Host
|
|
return fmt.Sprintf("%s://%s/npm", scheme, host)
|
|
}
|
|
|
|
// rewriteMetadataURLs recursively rewrites upstream URLs to proxy URLs in metadata
|
|
func rewriteMetadataURLs(data interface{}, upstream, proxyBaseURL string) {
|
|
switch v := data.(type) {
|
|
case map[string]interface{}:
|
|
for key, value := range v {
|
|
if key == "tarball" || key == "dist" {
|
|
// Rewrite tarball URL
|
|
if strVal, ok := value.(string); ok {
|
|
v[key] = strings.Replace(strVal, upstream, proxyBaseURL, 1)
|
|
} else if distMap, ok := value.(map[string]interface{}); ok {
|
|
// Handle dist object with tarball field
|
|
rewriteMetadataURLs(distMap, upstream, proxyBaseURL)
|
|
}
|
|
} else {
|
|
// Recursively process nested objects
|
|
rewriteMetadataURLs(value, upstream, proxyBaseURL)
|
|
}
|
|
}
|
|
case []interface{}:
|
|
for _, item := range v {
|
|
rewriteMetadataURLs(item, upstream, proxyBaseURL)
|
|
}
|
|
}
|
|
}
|