Files
traefikoidc/middleware_edge_cases_test.go
lukaszraczylo 72e2b682bb fix: eliminate per-request global mutexes in Yaegi hot paths
The v1.0.14 fix replaced one contended sync.RWMutex (RefreshCoordinator.
refreshMutex) with sync.Map. Production showed the same death-spiral
signature recurring ~2 hours later — same shape, different mutex:
65 goroutines stuck on a sync.(*RWMutex).Lock at one address, pod
pinned at 1000m CPU, identical Yaegi runCfg/reflect.Value.Call stack
pattern. The mutex was RefreshCoordinator.attemptsMutex.

Generalising: under Yaegi (interpreted Go for traefik plugins), any
per-request global mutex acquisition is a latent serialization point.
reflect.Value.Call dispatch on a held lock turns a microsecond
critical section into a multi-millisecond one, and on a GOMAXPROCS=1
pod the queue is unbounded.

This commit removes every per-request global mutex on the hot path:

1. RefreshCoordinator.attemptsMutex (sync.RWMutex)
   sessionRefreshAttempts: map -> sync.Map.
   refreshAttemptTracker: all fields atomic (int32, int64 UnixNano,
   cooldownEndNano == 0 as the not-in-cooldown sentinel, replacing
   the inCooldown bool).
   isInCooldown / recordRefreshAttempt / recordRefreshSuccess /
   recordRefreshFailure all become lock-free. Cooldown entry uses
   CompareAndSwapInt64 so only one goroutine logs the transition.

2. RefreshCircuitBreaker.mutex (sync.RWMutex)
   lastFailureTime / lastSuccessTime -> atomic.Int64 UnixNano.
   state and failures already atomic.
   AllowRequest / RecordSuccess / RecordFailure now pure atomic ops.

3. TraefikOidc.firstRequestMutex (sync.Mutex)
   firstRequestReceived bool -> firstRequestStarted int32.
   metadataRefreshStarted bool -> metadataRefreshStartedAtomic int32.
   ServeHTTP bootstrap path uses CompareAndSwapInt32 — fires once,
   zero steady-state cost. Previously the mutex was acquired on
   every non-health request forever.

4. TraefikOidc.metadataRetryMutex (sync.Mutex)
   lastMetadataRetryTime time.Time -> lastMetadataRetryNano int64.
   The 30-second retry throttle is now a CAS on lastMetadataRetryNano.

cleanupStaleEntries iterates via sync.Map.Range; eviction is a
CompareAndDelete by pointer identity so a tracker freshly re-used by
a concurrent caller is not lost.

Empirical evidence (3 specialist-agent analysis of the v1.0.14 spike,
profiles in /tmp/traefik-spike-1779511683/):
  * mutex profile: 97% delay in sync.(*Mutex).Unlock via
    HTTPHandlerSwitcher -> accesslog -> metrics -> backoff.RetryNotify
  * 65 stuck goroutines at one RWMutex address (0x40022eb648),
    identical Yaegi CFG pointer, all on rc.attemptsMutex via
    recordRefreshAttempt + isInCooldown
  * traffic driver: long-lived in-cluster Go-http-client doing
    ~5.4 req/s POST embeddings via OIDC cookie session → same
    sessionID → contention all funnels to one tracker entry

Yaegi support for sync/atomic confirmed at
github.com/traefik/yaegi@v0.16.1/stdlib/go1_22_sync_atomic.go:
AddInt32/Int64, LoadInt32/Int64, StoreInt32/Int64,
CompareAndSwapInt32/Int64 all exposed via reflect.ValueOf. Yaegi
dispatches each call through reflect.Value.Call to the COMPILED
atomic.* function, which executes a single hardware CAS/LOCK-XADD
instruction. Each atomic op still pays Yaegi dispatch cost but
cannot block — no queueing, no death spiral.

Trade-off acknowledged: v1.0.15 issues ~6-8 atomic/sync.Map ops per
leader-path request vs the 4 mutex ops of v1.0.14. Under low
contention this is a modest CPU bump. Under high contention it's
an unbounded → bounded transformation. Net win.

All tests pass with -race; golangci-lint clean.
2026-05-23 10:47:21 +01:00

403 lines
13 KiB
Go

package traefikoidc
import (
"context"
"net/http"
"net/http/httptest"
"testing"
)
// TestMiddlewareContextCancellation tests request context cancellation
func TestMiddlewareContextCancellation(t *testing.T) {
oidc := &TraefikOidc{
logger: NewLogger("debug"),
initComplete: make(chan struct{}), // Never close to simulate waiting
sessionManager: createTestSessionManager(t),
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
}
// Create request with canceled context
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
req := httptest.NewRequest("GET", "/api/test", nil).WithContext(ctx)
rw := httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
// Should return timeout/cancel error
if rw.Code != http.StatusRequestTimeout && rw.Code != http.StatusServiceUnavailable {
t.Errorf("Expected timeout status for canceled context, got %d", rw.Code)
}
}
// TestMiddlewareSessionErrorRecovery tests session error recovery
func TestMiddlewareSessionErrorRecovery(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}),
logger: NewLogger("debug"),
initComplete: make(chan struct{}),
sessionManager: createTestSessionManager(t),
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
issuerURL: "https://provider.example.com",
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
authURL: "https://provider.example.com/auth",
}
close(oidc.initComplete)
// Create request with corrupted session cookie
req := httptest.NewRequest("GET", "/api/test", nil)
req.AddCookie(&http.Cookie{
Name: "_oidc_session",
Value: "corrupted!!!invalid!!!",
})
rw := httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
// Should handle gracefully and initiate auth
if rw.Code != http.StatusFound && rw.Code != http.StatusSeeOther {
t.Errorf("Expected redirect for corrupted session, got %d", rw.Code)
}
}
// TestMiddlewareAJAXRequestHandling tests AJAX-specific request handling
func TestMiddlewareAJAXRequestHandling(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}),
logger: NewLogger("debug"),
initComplete: make(chan struct{}),
sessionManager: createTestSessionManager(t),
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
issuerURL: "https://provider.example.com",
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
}
close(oidc.initComplete)
req := httptest.NewRequest("GET", "/api/test", nil)
req.Header.Set("X-Requested-With", "XMLHttpRequest")
rw := httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
// AJAX request without auth should get 401, not redirect
if rw.Code != http.StatusUnauthorized {
t.Errorf("Expected 401 for unauthenticated AJAX request, got %d", rw.Code)
}
}
// TestLogoutWorksWithoutOIDCInitialization tests that logout works even if OIDC provider is unavailable
// This is critical for allowing users to clear their session when the provider is down
func TestLogoutWorksWithoutOIDCInitialization(t *testing.T) {
oidc := &TraefikOidc{
logger: NewLogger("debug"),
initComplete: make(chan struct{}), // Never close to simulate provider unavailable
sessionManager: createTestSessionManager(t),
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
logoutURLPath: "/logout",
postLogoutRedirectURI: "/",
forceHTTPS: false,
}
// Note: initComplete is NOT closed, simulating OIDC provider being unavailable
req := httptest.NewRequest("GET", "/logout", nil)
req.Host = "example.com"
rw := httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
// Should redirect to post-logout URI even without OIDC initialization
if rw.Code != http.StatusFound {
t.Errorf("Expected redirect (302) for logout, got %d", rw.Code)
}
location := rw.Header().Get("Location")
if location == "" {
t.Error("Expected Location header for logout redirect")
}
}
// TestMiddlewareDomainRestrictions tests domain-based access control
// NOTE: Currently commented out due to complex session setup requirements
// These scenarios are tested indirectly through integration tests
/*
func TestMiddlewareDomainRestrictions(t *testing.T) {
sessionManager := createTestSessionManager(t)
t.Run("allowed_domain_passes", func(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}),
logger: NewLogger("debug"),
initComplete: make(chan struct{}),
sessionManager: sessionManager,
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
issuerURL: "https://provider.example.com",
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
allowedUserDomains: map[string]struct{}{
"example.com": {},
},
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
return map[string]interface{}{"email": "user@example.com"}, nil
},
}
close(oidc.initComplete)
// Create authenticated session
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
session.SetUserIdentifier("user@example.com")
session.SetAuthenticated(true)
session.SetIDToken("dummy-token")
session.Save(req, httptest.NewRecorder())
// Add session cookies to request
rw := httptest.NewRecorder()
session.Save(req, rw)
for _, cookie := range rw.Result().Cookies() {
req.AddCookie(cookie)
}
rw = httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
if rw.Code != http.StatusOK {
t.Errorf("Expected 200 for allowed domain, got %d", rw.Code)
}
})
t.Run("forbidden_domain_blocked", func(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}),
logger: NewLogger("debug"),
initComplete: make(chan struct{}),
sessionManager: sessionManager,
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
issuerURL: "https://provider.example.com",
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
allowedUserDomains: map[string]struct{}{
"example.com": {},
},
}
close(oidc.initComplete)
// Create session with forbidden domain
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
session.SetUserIdentifier("user@forbidden.com")
session.SetAuthenticated(true)
// Save and inject cookies
rw := httptest.NewRecorder()
session.Save(req, rw)
for _, cookie := range rw.Result().Cookies() {
req.AddCookie(cookie)
}
rw = httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
if rw.Code != http.StatusForbidden {
t.Errorf("Expected 403 for forbidden domain, got %d", rw.Code)
}
})
}
*/
// TestMiddlewareOpaqueTokenHandling tests opaque (non-JWT) token handling
// NOTE: Currently commented out due to complex session setup requirements
/*
func TestMiddlewareOpaqueTokenHandling(t *testing.T) {
sessionManager := createTestSessionManager(t)
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}),
logger: NewLogger("debug"),
initComplete: make(chan struct{}),
sessionManager: sessionManager,
firstRequestStarted: 1,
metadataRefreshStartedAtomic: 1,
issuerURL: "https://provider.example.com",
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
return map[string]interface{}{"email": "user@example.com"}, nil
},
}
close(oidc.initComplete)
// Create session with opaque token
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
session.SetUserIdentifier("user@example.com")
session.SetAccessToken("sk_live_abcdefghijklmnopqrstuvwxyz") // Opaque token (no dots)
session.SetAuthenticated(true)
// Save and inject cookies
rw := httptest.NewRecorder()
session.Save(req, rw)
for _, cookie := range rw.Result().Cookies() {
req.AddCookie(cookie)
}
rw = httptest.NewRecorder()
oidc.ServeHTTP(rw, req)
// Should process successfully without JWT verification
if rw.Code != http.StatusOK {
t.Errorf("Expected 200 for opaque token, got %d", rw.Code)
}
}
*/
// TestMiddlewareProcessAuthorizedRequestEdgeCases tests processAuthorizedRequest edge cases
func TestMiddlewareProcessAuthorizedRequestEdgeCases(t *testing.T) {
sessionManager := createTestSessionManager(t)
t.Run("missing_email_initiates_reauth", func(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}),
logger: NewLogger("debug"),
sessionManager: sessionManager,
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
authURL: "https://provider.example.com/auth",
}
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
session.SetUserIdentifier("") // No email
session.SetIDToken("dummy-token")
rw := httptest.NewRecorder()
redirectURL := "https://example.com/callback"
oidc.processAuthorizedRequest(rw, req, session, redirectURL)
// Should initiate re-auth
if rw.Code != http.StatusFound && rw.Code != http.StatusSeeOther {
t.Errorf("Expected redirect when email is missing, got %d", rw.Code)
}
})
t.Run("missing_token_with_role_checks", func(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}),
logger: NewLogger("debug"),
sessionManager: sessionManager,
redirURLPath: "/callback",
logoutURLPath: "/logout",
clientID: "test-client",
audience: "test-client",
authURL: "https://provider.example.com/auth",
allowedRolesAndGroups: map[string]struct{}{
"admin": {},
},
}
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
session.SetUserIdentifier("user@example.com")
session.SetIDToken("") // No ID token
session.SetAccessToken("") // No access token
rw := httptest.NewRecorder()
redirectURL := "https://example.com/callback"
oidc.processAuthorizedRequest(rw, req, session, redirectURL)
// Should initiate re-auth when token is missing but role checks required
if rw.Code != http.StatusFound && rw.Code != http.StatusSeeOther {
t.Errorf("Expected redirect when token is missing with role checks, got %d", rw.Code)
}
})
t.Run("security_headers_applied", func(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}),
logger: NewLogger("debug"),
sessionManager: sessionManager,
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
return map[string]interface{}{}, nil
},
}
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
session.SetUserIdentifier("user@example.com")
session.SetIDToken("dummy-token")
rw := httptest.NewRecorder()
redirectURL := "https://example.com/callback"
oidc.processAuthorizedRequest(rw, req, session, redirectURL)
// Verify security headers are set
if rw.Header().Get("X-Frame-Options") == "" {
t.Error("Expected X-Frame-Options header to be set")
}
if rw.Header().Get("X-Content-Type-Options") == "" {
t.Error("Expected X-Content-Type-Options header to be set")
}
if rw.Header().Get("X-XSS-Protection") == "" {
t.Error("Expected X-XSS-Protection header to be set")
}
})
t.Run("authentication_headers_set", func(t *testing.T) {
oidc := &TraefikOidc{
next: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}),
logger: NewLogger("debug"),
sessionManager: sessionManager,
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
return map[string]interface{}{}, nil
},
}
req := httptest.NewRequest("GET", "/api/test", nil)
session, _ := sessionManager.GetSession(req)
testEmail := "user@example.com"
session.SetUserIdentifier(testEmail)
session.SetIDToken("dummy-id-token")
rw := httptest.NewRecorder()
redirectURL := "https://example.com/callback"
oidc.processAuthorizedRequest(rw, req, session, redirectURL)
// Verify authentication headers
if req.Header.Get("X-Forwarded-User") != testEmail {
t.Errorf("Expected X-Forwarded-User=%s, got %s", testEmail, req.Header.Get("X-Forwarded-User"))
}
if req.Header.Get("X-Auth-Request-User") != testEmail {
t.Errorf("Expected X-Auth-Request-User=%s, got %s", testEmail, req.Header.Get("X-Auth-Request-User"))
}
// Token header may not be set in all scenarios, just verify it's not causing errors
})
}