mirror of
https://github.com/lukaszraczylo/traefikoidc.git
synced 2026-06-05 22:44:17 +00:00
9126c74723
* Allow internal IPs for OIDC configuration via extra flag. Addresses issue #97 * Allow for internal IPs in OIDC configuration. Addresses issue #97. * feat: Add allowPrivateIPAddresses config option for internal networks Adds a new configuration option `allowPrivateIPAddresses` that allows OIDC provider URLs to use private IP addresses (10.x.x.x, 172.16-31.x.x, 192.168.x.x). This is useful for internal deployments where Keycloak or other OIDC providers run on private networks without DNS resolution. Security considerations: - Loopback addresses (127.0.0.1, localhost, ::1) remain blocked - Link-local addresses (169.254.x.x) remain blocked - Default is false (secure by default) Fixes #97 * feat: Support non-email user identifiers for Azure AD Add userIdentifierClaim configuration option to support Azure AD users without email addresses. This allows using alternative JWT claims like "sub", "oid", "upn", or "preferred_username" for user identification. - Default behavior uses "email" claim (backward compatible) - Falls back to "sub" claim if configured claim is missing - allowedUsers matches against the configured claim value - allowedUserDomains only applies when using email-based identification Fixes #95 * Race condition on traefik pod startup When the plugin initializes and calls GetMetadataWithRecovery(): 1. Checks cache first (if metadata is cached, returns immediately) 2. Creates a retry executor with startup-optimized settings (10 attempts, 1s delays) 3. Attempts to fetch metadata from the OIDC provider 4. If the fetch fails with a retryable error (connection refused, EOF, TLS/certificate errors, Traefik default cert), it waits and retries 5. After 10 attempts or on a non-retryable error, returns the error This allows the plugin to handle the race condition where: - Traefik initializes the plugin before routes are established - Traefik serves its default certificate before loading real ones - The OIDC provider pod isn't fully ready yet Fixes issue #90 * Race condition on traefik pod startup When the plugin initializes and calls GetMetadataWithRecovery(): 1. Checks cache first (if metadata is cached, returns immediately) 2. Creates a retry executor with startup-optimized settings (10 attempts, 1s delays) 3. Attempts to fetch metadata from the OIDC provider 4. If the fetch fails with a retryable error (connection refused, EOF, TLS/certificate errors, Traefik default cert), it waits and retries 5. After 10 attempts or on a non-retryable error, returns the error This allows the plugin to handle the race condition where: - Traefik initializes the plugin before routes are established - Traefik serves its default certificate before loading real ones - The OIDC provider pod isn't fully ready yet Fixes issue #90 * Headers too big and 431 responses Added new option `minimalHeaders` to reduce the size of forwarded headers from the auth middleware to backend services. - When minimalHeaders: false (default): All headers are forwarded as before - X-Forwarded-User (always set) - X-Auth-Request-Redirect - X-Auth-Request-User - X-Auth-Request-Token (the large ID token) - X-User-Groups, X-User-Roles (if configured) - When minimalHeaders: true: Reduces header overhead - X-Forwarded-User (always set) - X-User-Groups, X-User-Roles (still forwarded if configured) - Custom templated headers (still processed) - Skipped: X-Auth-Request-Token, X-Auth-Request-User, X-Auth-Request-Redirect Fixes issues #64 and #86
263 lines
7.9 KiB
Go
263 lines
7.9 KiB
Go
package traefikoidc
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
// metadataCacheVersion is incremented when cache format changes
|
|
// This ensures old cached data is automatically ignored
|
|
metadataCacheVersion = "v2"
|
|
)
|
|
|
|
// MetadataCache wraps UniversalCache for metadata operations
|
|
type MetadataCache struct {
|
|
cache *UniversalCache
|
|
logger *Logger
|
|
wg *sync.WaitGroup
|
|
}
|
|
|
|
// versionedKey adds version prefix to cache keys
|
|
func (mc *MetadataCache) versionedKey(key string) string {
|
|
return metadataCacheVersion + ":" + key
|
|
}
|
|
|
|
// MetadataCacheEntry for compatibility
|
|
type MetadataCacheEntry struct {
|
|
}
|
|
|
|
// NewMetadataCache creates a new metadata cache
|
|
func NewMetadataCache(wg *sync.WaitGroup) *MetadataCache {
|
|
manager := GetUniversalCacheManager(nil)
|
|
return &MetadataCache{
|
|
cache: manager.GetMetadataCache(),
|
|
logger: manager.logger,
|
|
wg: wg,
|
|
}
|
|
}
|
|
|
|
// NewMetadataCacheWithLogger creates a metadata cache with specific logger
|
|
func NewMetadataCacheWithLogger(wg *sync.WaitGroup, logger *Logger) *MetadataCache {
|
|
manager := GetUniversalCacheManager(logger)
|
|
return &MetadataCache{
|
|
cache: manager.GetMetadataCache(),
|
|
logger: logger,
|
|
wg: wg,
|
|
}
|
|
}
|
|
|
|
// Set stores provider metadata with a TTL
|
|
func (mc *MetadataCache) Set(providerURL string, metadata *ProviderMetadata, ttl time.Duration) error {
|
|
if metadata == nil {
|
|
return fmt.Errorf("metadata cannot be nil")
|
|
}
|
|
|
|
mc.logger.Debugf("MetadataCache: Setting metadata for %s with TTL %v", providerURL, ttl)
|
|
|
|
// Store as JSON for consistency
|
|
data, err := json.Marshal(metadata)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal metadata: %w", err)
|
|
}
|
|
|
|
// Use versioned key to prevent stale data issues
|
|
return mc.cache.Set(mc.versionedKey(providerURL), data, ttl)
|
|
}
|
|
|
|
// Get retrieves provider metadata from cache
|
|
func (mc *MetadataCache) Get(providerURL string) (*ProviderMetadata, bool) {
|
|
// Use versioned key to prevent stale data issues
|
|
value, exists := mc.cache.Get(mc.versionedKey(providerURL))
|
|
if !exists {
|
|
mc.logger.Debugf("MetadataCache: MISS for %s", providerURL)
|
|
return nil, false
|
|
}
|
|
|
|
// Handle different value types
|
|
var data []byte
|
|
switch v := value.(type) {
|
|
case []byte:
|
|
data = v
|
|
case string:
|
|
data = []byte(v)
|
|
default:
|
|
mc.logger.Errorf("MetadataCache: Invalid data type for %s: %T", providerURL, value)
|
|
return nil, false
|
|
}
|
|
|
|
// Debug: log first 100 chars of cached data to diagnose unmarshal issues
|
|
dataPreview := string(data)
|
|
if len(dataPreview) > 100 {
|
|
dataPreview = dataPreview[:100]
|
|
}
|
|
mc.logger.Debugf("MetadataCache: Attempting to unmarshal for %s, data preview: %s", providerURL, dataPreview)
|
|
|
|
var metadata ProviderMetadata
|
|
if err := json.Unmarshal(data, &metadata); err != nil {
|
|
// Graceful degradation: corrupt data is treated as cache miss
|
|
mc.logger.Errorf("MetadataCache: Corrupt data detected for %s: %v (preview: %s) - deleting and treating as miss", providerURL, err, dataPreview)
|
|
|
|
// Delete corrupt entry to prevent repeated errors (use versioned key)
|
|
mc.cache.Delete(mc.versionedKey(providerURL))
|
|
|
|
return nil, false
|
|
}
|
|
|
|
mc.logger.Debugf("MetadataCache: HIT for %s", providerURL)
|
|
return &metadata, true
|
|
}
|
|
|
|
// GetProviderMetadata fetches metadata with automatic caching
|
|
func (mc *MetadataCache) GetProviderMetadata(ctx context.Context, providerURL string, httpClient *http.Client) (*ProviderMetadata, error) {
|
|
// Check cache first
|
|
if metadata, exists := mc.Get(providerURL); exists {
|
|
return metadata, nil
|
|
}
|
|
|
|
// Fetch from provider
|
|
// Ensure no double slashes by trimming trailing slash from provider URL
|
|
metadataURL := strings.TrimRight(providerURL, "/") + "/.well-known/openid-configuration"
|
|
mc.logger.Infof("Fetching provider metadata from: %s", metadataURL)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", metadataURL, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
|
|
resp, err := httpClient.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to fetch metadata: %w", err)
|
|
}
|
|
defer func() { _ = resp.Body.Close() }() // Safe to ignore: closing body on defer
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("metadata fetch returned status %d", resp.StatusCode)
|
|
}
|
|
|
|
var metadata ProviderMetadata
|
|
if err := json.NewDecoder(resp.Body).Decode(&metadata); err != nil {
|
|
return nil, fmt.Errorf("failed to decode metadata: %w", err)
|
|
}
|
|
|
|
// Cache for 1 hour by default
|
|
if err := mc.Set(providerURL, &metadata, 1*time.Hour); err != nil {
|
|
mc.logger.Errorf("Failed to cache metadata: %v", err)
|
|
}
|
|
|
|
return &metadata, nil
|
|
}
|
|
|
|
// Clear removes all cached metadata
|
|
func (mc *MetadataCache) Clear() {
|
|
mc.cache.Clear()
|
|
mc.logger.Info("MetadataCache: Cleared all entries")
|
|
}
|
|
|
|
// Close shuts down the cache
|
|
func (mc *MetadataCache) Close() {
|
|
// Cache is managed globally, so we don't close it here
|
|
mc.logger.Debug("MetadataCache: Close called (managed by global cache manager)")
|
|
}
|
|
|
|
// GetMetrics returns cache metrics
|
|
func (mc *MetadataCache) GetMetrics() map[string]interface{} {
|
|
return mc.cache.GetMetrics()
|
|
}
|
|
|
|
// Size returns the number of cached entries
|
|
func (mc *MetadataCache) Size() int {
|
|
return mc.cache.Size()
|
|
}
|
|
|
|
// GetMetadata fetches metadata with HTTP client and logger
|
|
func (mc *MetadataCache) GetMetadata(providerURL string, httpClient *http.Client, logger *Logger) (*ProviderMetadata, error) {
|
|
// Check cache first
|
|
if metadata, exists := mc.Get(providerURL); exists {
|
|
return metadata, nil
|
|
}
|
|
|
|
// Use context with timeout
|
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cancel()
|
|
|
|
return mc.GetProviderMetadata(ctx, providerURL, httpClient)
|
|
}
|
|
|
|
// GetMetadataWithRecovery fetches metadata with retry support for startup scenarios.
|
|
// This handles the race condition where Traefik initializes the plugin before the
|
|
// OIDC provider routes are fully established, or before TLS certificates are loaded.
|
|
// Uses aggressive retry settings (10 attempts, 1s intervals) to give the infrastructure
|
|
// time to stabilize during cold starts.
|
|
// See: https://github.com/lukaszraczylo/traefikoidc/issues/90
|
|
func (mc *MetadataCache) GetMetadataWithRecovery(providerURL string, httpClient *http.Client, logger *Logger, errorRecoveryManager *ErrorRecoveryManager) (*ProviderMetadata, error) {
|
|
// Check cache first - if we have valid cached metadata, use it
|
|
if metadata, exists := mc.Get(providerURL); exists {
|
|
return metadata, nil
|
|
}
|
|
|
|
// Create a retry executor with metadata-fetch-specific configuration
|
|
retryConfig := MetadataFetchRetryConfig()
|
|
retryExecutor := NewRetryExecutor(retryConfig, logger)
|
|
|
|
var metadata *ProviderMetadata
|
|
var lastErr error
|
|
|
|
// Use context with overall timeout for the entire retry sequence
|
|
// 10 attempts * ~10s max delay = ~100s worst case, so use 2 minute timeout
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
|
defer cancel()
|
|
|
|
err := retryExecutor.ExecuteWithContext(ctx, func() error {
|
|
// Create per-attempt context with shorter timeout
|
|
attemptCtx, attemptCancel := context.WithTimeout(ctx, 15*time.Second)
|
|
defer attemptCancel()
|
|
|
|
var fetchErr error
|
|
metadata, fetchErr = mc.GetProviderMetadata(attemptCtx, providerURL, httpClient)
|
|
if fetchErr != nil {
|
|
lastErr = fetchErr
|
|
if logger != nil {
|
|
logger.Debugf("Metadata fetch attempt failed: %v", fetchErr)
|
|
}
|
|
return fetchErr
|
|
}
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
// Return the last actual error, not the retry wrapper error
|
|
if lastErr != nil {
|
|
return nil, lastErr
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
return metadata, nil
|
|
}
|
|
|
|
// GetStats returns cache statistics for testing
|
|
func (mc *MetadataCache) GetStats() map[string]interface{} {
|
|
return mc.cache.GetMetrics()
|
|
}
|
|
|
|
// CleanupExpired triggers cleanup of expired entries
|
|
func (mc *MetadataCache) CleanupExpired() {
|
|
mc.cache.Cleanup()
|
|
}
|
|
|
|
// Delete removes an entry from the cache
|
|
func (mc *MetadataCache) Delete(key string) {
|
|
mc.cache.Delete(mc.versionedKey(key))
|
|
}
|
|
|
|
// Mutex returns the cache mutex for testing
|
|
func (mc *MetadataCache) Mutex() *sync.RWMutex {
|
|
return &mc.cache.mu
|
|
}
|