mirror of
https://github.com/lukaszraczylo/traefikoidc.git
synced 2026-06-06 22:49:43 +00:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 72e2b682bb | |||
| ae4ccaa89d | |||
| 984fd1c08f | |||
| 99bdd23986 | |||
| a548665edb | |||
| 8c5df82dcf | |||
| aa96e9dbee |
@@ -0,0 +1,15 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: lukaszraczylo
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||
polar: # Replace with a single Polar username
|
||||
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
|
||||
thanks_dev: # Replace with a single thanks.dev username
|
||||
custom: https://monzo.me/lukaszraczylo
|
||||
@@ -80,3 +80,25 @@ testData:
|
||||
# address: redis:6379
|
||||
# password: urn:k8s:secret:redis:password
|
||||
# cacheMode: hybrid
|
||||
|
||||
# Optional: bearer-token authentication for M2M (machine-to-machine) API
|
||||
# clients. Default off. When enabled, requests presenting
|
||||
# "Authorization: Bearer <jwt>" are validated against the configured OIDC
|
||||
# provider (signature/issuer/audience/exp) and forwarded without creating
|
||||
# a cookie session. The bearer path REJECTS ID tokens, requires a non-
|
||||
# default audience, and never trusts the `email` claim as the identifier.
|
||||
# See docs/BEARER_AUTH.md for the full threat model.
|
||||
#
|
||||
# enableBearerAuth: true # opt-in
|
||||
# audience: https://api.example.com # REQUIRED when bearer is enabled
|
||||
# bearerIdentifierClaim: sub # default; used as X-Forwarded-User. `email` is rejected.
|
||||
# stripAuthorizationHeader: true # default; drops the raw token before forwarding
|
||||
# bearerEmitWWWAuthenticate: true # default; RFC 6750 hint on 401s
|
||||
# bearerOverridesCookie: false # default; cookie wins when both are present
|
||||
# requireTokenIntrospection: false # opt-in; calls RFC 7662 introspection per request
|
||||
# maxTokenAgeSeconds: 86400 # 24h cap on iat (rejects clock-skew/forever tokens)
|
||||
# maxIdentifierLength: 256 # cap on the sanitised principal identifier
|
||||
# bearerFailureThreshold: 20 # consecutive 401s/IP that trip the throttle
|
||||
# bearerFailureWindowSeconds: 60 # rolling window over which 401s are counted
|
||||
# bearerFailurePenaltySeconds: 60 # 429 + Retry-After duration after threshold trips
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ manages sessions, and forwards user identity to downstream services.
|
||||
- [Configuration reference](docs/CONFIGURATION.md) — every parameter
|
||||
- [Provider guide](docs/PROVIDERS.md) — Google, Azure, Auth0, Okta, Keycloak, Cognito, GitLab, GitHub, generic
|
||||
- [Auth0 audience guide](docs/AUTH0_AUDIENCE_GUIDE.md) — custom APIs, opaque tokens, token confusion
|
||||
- [Bearer-token (M2M) auth](docs/BEARER_AUTH.md) — opt-in `Authorization: Bearer` path, threat model
|
||||
- [Redis cache](docs/REDIS.md) — multi-replica deployments
|
||||
- [Dynamic Client Registration](docs/DCR.md) — RFC 7591
|
||||
- [Development](docs/DEVELOPMENT.md) · [Testing](docs/TESTING.md)
|
||||
@@ -171,6 +172,92 @@ Each instance must use a unique `cookiePrefix` **and** `sessionEncryptionKey`,
|
||||
otherwise a session minted by one instance can grant access through another.
|
||||
See [issue #87](https://github.com/lukaszraczylo/traefikoidc/issues/87).
|
||||
|
||||
### Bearer-token (M2M) authentication
|
||||
|
||||
Opt-in path for API clients that present `Authorization: Bearer <jwt>` instead
|
||||
of logging in via the browser flow. Default off. When enabled, the middleware
|
||||
validates the bearer JWT against the configured OIDC provider (signature,
|
||||
issuer, audience, expiry) and forwards the request downstream with the
|
||||
principal headers — no cookie session is created.
|
||||
|
||||
```yaml
|
||||
enableBearerAuth: true
|
||||
audience: https://api.example.com # REQUIRED when bearer is enabled
|
||||
# optional, defaults shown:
|
||||
bearerIdentifierClaim: sub # claim used as X-Forwarded-User
|
||||
stripAuthorizationHeader: true # drop the raw token before forwarding
|
||||
bearerEmitWWWAuthenticate: true # RFC 6750 hint on 401s
|
||||
bearerOverridesCookie: false # cookie wins when both are present (safer)
|
||||
maxTokenAgeSeconds: 86400 # 24h cap on iat
|
||||
bearerFailureThreshold: 20 # consecutive 401s/IP before 429 throttle
|
||||
```
|
||||
|
||||
Hardening built in by default:
|
||||
|
||||
- **Audience required.** Startup fails if `enableBearerAuth=true` and
|
||||
`audience` is unset. Eliminates the "token issued for service B accepted
|
||||
by A" confusion vector.
|
||||
- **ID tokens explicitly rejected.** Bearer is access-token-only. ID tokens
|
||||
(detected via `nonce`, `typ: at+jwt`, `token_use`, `scope`, or audience
|
||||
shape) return `401`.
|
||||
- **`alg` and `kid` pinned at the entrypoint.** Asymmetric-only allowlist
|
||||
(`RS256/384/512`, `PS256/384/512`, `ES256/384/512`); `kid` length and
|
||||
charset capped — both checked **before** any JWKS fetch so attacker noise
|
||||
can't amplify into upstream calls.
|
||||
- **Identifier sanitised.** Default identifier source is `sub`; `email` is
|
||||
rejected unless explicitly opted in (which the middleware still refuses to
|
||||
avoid the unverified-email spoofing footgun). Control characters, bidi-
|
||||
override codepoints, and the delimiters `, ; =` are all rejected before
|
||||
the value reaches `X-Forwarded-User`.
|
||||
- **Multi-audience tokens require `azp`.** When `aud` is an array of more
|
||||
than one element, the token must carry `azp == clientID`.
|
||||
- **`iat` upper-age bound.** Tokens older than `maxTokenAgeSeconds` are
|
||||
rejected even if `exp` is far in the future.
|
||||
- **Per-IP 401 throttle.** After `bearerFailureThreshold` consecutive 401s
|
||||
from one source IP, further bearer requests from that IP are rejected
|
||||
with `429 Too Many Requests` + `Retry-After`.
|
||||
- **Cookie-wins by default.** When both a session cookie and an
|
||||
`Authorization: Bearer` header arrive on the same request, the cookie path
|
||||
runs (safer against browser/extension/proxy bearer injection). Set
|
||||
`bearerOverridesCookie: true` for the AWS/GCP/Kubernetes convention.
|
||||
- **Replay protection preserved.** The bearer path skips the JTI **Set**
|
||||
(so the same token can be reused) but the **Get** stays active —
|
||||
`RevokeToken` still terminates a bearer token immediately.
|
||||
- **Excluded URLs strip Authorization.** When `enableBearerAuth=true`,
|
||||
excluded paths (e.g. `/health`, `/metrics`) get the `Authorization` header
|
||||
removed before forwarding so the token can't leak into public endpoint
|
||||
logs.
|
||||
- **Optional real-time revocation.** Set `requireTokenIntrospection: true`
|
||||
to call RFC 7662 introspection on every cache miss; revoked tokens fail
|
||||
immediately. Introspection endpoint failures return `503` (distinguishes
|
||||
infra outage from credential rejection).
|
||||
|
||||
**Obtaining bearer tokens** — minting is the IdP's job, not the
|
||||
middleware's. The canonical M2M flow is OAuth 2.0 `client_credentials`
|
||||
(RFC 6749 §4.4); Google requires JWT bearer assertion (RFC 7523) instead.
|
||||
Minimal Auth0-shape request:
|
||||
|
||||
```bash
|
||||
curl -s -X POST https://issuer.example.com/oauth/token \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": "your-m2m-client-id",
|
||||
"client_secret": "your-m2m-client-secret",
|
||||
"audience": "https://api.example.com",
|
||||
"scope": "api:read api:write"
|
||||
}'
|
||||
```
|
||||
|
||||
The `audience` you request from the IdP **must match** the `audience` you
|
||||
configured on the middleware. Per-provider endpoints, parameter names, and
|
||||
gotchas (Entra v2 endpoint, Cognito Resource Servers, Keycloak audience
|
||||
mappers, Google's opaque-token quirk) are documented in
|
||||
[docs/BEARER_AUTH.md](docs/BEARER_AUTH.md#obtaining-bearer-tokens-from-your-oidc-provider).
|
||||
|
||||
Full threat model, configuration matrix, and follow-up gaps in
|
||||
[docs/BEARER_AUTH.md](docs/BEARER_AUTH.md).
|
||||
|
||||
### SSE and WebSocket endpoints
|
||||
|
||||
Browser clients cannot follow an OIDC `302` redirect on an SSE stream or a
|
||||
@@ -324,6 +411,19 @@ namespaced claims, Cognito regions, GitLab self-hosted) live in
|
||||
|
||||
Set `logLevel: debug` to surface detail.
|
||||
|
||||
## Telemetry
|
||||
|
||||
On first plugin instantiation this middleware sends a single anonymous
|
||||
adoption ping — project name, version, timestamp; no identifiers, no
|
||||
request data, no token contents. Fire-and-forget with a 2-second timeout;
|
||||
cannot block plugin load or panic.
|
||||
|
||||
Local source: [`telemetry.go`](./telemetry.go). Disclosure mirrors
|
||||
**[oss-telemetry — Disabling telemetry](https://github.com/lukaszraczylo/oss-telemetry#disabling-telemetry)**.
|
||||
|
||||
Quick opt-out: set any of `DO_NOT_TRACK=1`, `OSS_TELEMETRY_DISABLED=1`,
|
||||
or `TRAEFIKOIDC_DISABLE_TELEMETRY=1`.
|
||||
|
||||
## License
|
||||
|
||||
See [LICENSE](LICENSE).
|
||||
|
||||
+592
@@ -0,0 +1,592 @@
|
||||
// Package traefikoidc — bearer-token (M2M) authentication path.
|
||||
//
|
||||
// Disabled by default. When enabled via Config.EnableBearerAuth, requests
|
||||
// presenting "Authorization: Bearer <jwt>" are validated against the
|
||||
// configured OIDC provider (signature, issuer, audience, exp, replay-Get)
|
||||
// and the request is forwarded downstream without creating a cookie session.
|
||||
//
|
||||
// Design rules (kept here in code as the single source of truth):
|
||||
// - Access tokens only. ID tokens are rejected via detectTokenType.
|
||||
// - Audience is mandatory (enforced at startup in main.go).
|
||||
// - alg + kid pinned BEFORE JWKS fetch to deny amplification probes.
|
||||
// - iat upper-age cap bounds clock-skew / forever-token abuse.
|
||||
// - Multi-audience tokens require matching azp.
|
||||
// - Per-IP 401 throttle returns 429 + Retry-After after a threshold.
|
||||
// - JTI Set is suppressed (skipReplayMarking) but JTI Get stays — revoked
|
||||
// tokens (RevokeToken adds to blacklist) are still rejected.
|
||||
// - Identifier is read from BearerIdentifierClaim (default "sub"), never
|
||||
// from UserIdentifierClaim, to avoid the unverified-email spoofing path.
|
||||
// - Identifier is sanitized: length cap, control chars, bidi-override,
|
||||
// delimiter chars (, ; =) rejected.
|
||||
// - On excluded URLs the Authorization header is stripped before forwarding.
|
||||
//
|
||||
// See docs/superpowers/specs/2026-05-18-bearer-token-auth-design.md and
|
||||
// docs/BEARER_AUTH.md for the full threat model.
|
||||
package traefikoidc
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
const bearerPrefix = "Bearer "
|
||||
|
||||
// bearerAlgAllowlist is the set of JWS algorithms accepted on the bearer
|
||||
// path. Asymmetric-only — HS* would allow public-key-as-HMAC-secret attacks
|
||||
// if any operator ever rotates a key into the symmetric branch by mistake;
|
||||
// "none" is obvious. Matches the allowlist enforced inside jwt.Verify but is
|
||||
// checked here BEFORE the JWKS fetch so attacker noise can't amplify.
|
||||
var bearerAlgAllowlist = map[string]struct{}{
|
||||
"RS256": {}, "RS384": {}, "RS512": {},
|
||||
"PS256": {}, "PS384": {}, "PS512": {},
|
||||
"ES256": {}, "ES384": {}, "ES512": {},
|
||||
}
|
||||
|
||||
// bearerKidMaxLen caps the JOSE kid header length to keep memory and cache-key
|
||||
// usage bounded against attacker-controlled values.
|
||||
const bearerKidMaxLen = 256
|
||||
|
||||
// validKidChar is the allowlist for kid header characters. Letters, digits,
|
||||
// dot, underscore, hyphen, equals. Intentionally narrow; real-world kid
|
||||
// values are short URL-safe-base64-ish identifiers.
|
||||
func validKidChar(r rune) bool {
|
||||
if r >= 'a' && r <= 'z' {
|
||||
return true
|
||||
}
|
||||
if r >= 'A' && r <= 'Z' {
|
||||
return true
|
||||
}
|
||||
if r >= '0' && r <= '9' {
|
||||
return true
|
||||
}
|
||||
switch r {
|
||||
case '.', '_', '-', '=':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// bearerError categorizes failure modes for the response builder. Categories
|
||||
// map 1:1 to the table in docs/superpowers/specs/2026-05-18-bearer-token-auth-design.md
|
||||
// §9 so behavior is auditable from spec to code.
|
||||
type bearerErrorKind int
|
||||
|
||||
const (
|
||||
bearerErrInvalidRequest bearerErrorKind = iota
|
||||
bearerErrInvalidToken
|
||||
bearerErrTokenInactive
|
||||
bearerErrInvalidIdentifier
|
||||
bearerErrForbidden
|
||||
bearerErrThrottled
|
||||
bearerErrIntrospectionUnavailable
|
||||
)
|
||||
|
||||
type bearerError struct {
|
||||
kind bearerErrorKind
|
||||
reason string
|
||||
}
|
||||
|
||||
func (e *bearerError) Error() string { return e.reason }
|
||||
|
||||
func newBearerError(kind bearerErrorKind, reason string) *bearerError {
|
||||
return &bearerError{kind: kind, reason: reason}
|
||||
}
|
||||
|
||||
// joseHeader is the minimal subset of the JWS protected header we inspect
|
||||
// BEFORE running the full verification pipeline. Lifted out so the alg+kid
|
||||
// pin can run without paying for parseJWT's full claim decode.
|
||||
type joseHeader struct {
|
||||
Alg string `json:"alg"`
|
||||
Kid string `json:"kid"`
|
||||
Typ string `json:"typ"`
|
||||
}
|
||||
|
||||
// parseBearerJOSEHeader decodes the first JWT segment for early alg/kid pinning.
|
||||
// Does not touch the payload or signature — those are the verifier's job.
|
||||
// Returns nil on success; *bearerError on rejection so the handler can map
|
||||
// directly to a status code. The decoded header itself is not surfaced because
|
||||
// callers don't need it (verifyTokenWithOpts re-parses internally).
|
||||
func parseBearerJOSEHeader(token string) *bearerError {
|
||||
dot := strings.IndexByte(token, '.')
|
||||
if dot <= 0 {
|
||||
return newBearerError(bearerErrInvalidToken, "malformed JWT: no header segment")
|
||||
}
|
||||
raw, err := base64.RawURLEncoding.DecodeString(token[:dot])
|
||||
if err != nil {
|
||||
// Some IdPs pad with '='; tolerate by retrying with StdEncoding.
|
||||
raw, err = base64.URLEncoding.DecodeString(token[:dot])
|
||||
if err != nil {
|
||||
return newBearerError(bearerErrInvalidToken, "malformed JWT: header not base64url")
|
||||
}
|
||||
}
|
||||
var hdr joseHeader
|
||||
if err := json.Unmarshal(raw, &hdr); err != nil {
|
||||
return newBearerError(bearerErrInvalidToken, "malformed JWT: header not JSON")
|
||||
}
|
||||
if _, ok := bearerAlgAllowlist[hdr.Alg]; !ok {
|
||||
return newBearerError(bearerErrInvalidToken, fmt.Sprintf("disallowed alg %q on bearer path", hdr.Alg))
|
||||
}
|
||||
if hdr.Kid == "" {
|
||||
return newBearerError(bearerErrInvalidToken, "missing kid header")
|
||||
}
|
||||
if len(hdr.Kid) > bearerKidMaxLen {
|
||||
return newBearerError(bearerErrInvalidToken, "kid header exceeds max length")
|
||||
}
|
||||
for _, r := range hdr.Kid {
|
||||
if !validKidChar(r) {
|
||||
return newBearerError(bearerErrInvalidToken, "kid header contains disallowed characters")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sanitizeBearerIdentifier validates and trims a principal identifier before
|
||||
// it is injected into request headers. Layered defense: net/http will reject
|
||||
// CRLF on the wire too, but rejecting early gives clearer error logs and
|
||||
// prevents bidi-override / delimiter chars that pass net/http's narrower
|
||||
// checks but confuse downstream parsers and admin UIs.
|
||||
func sanitizeBearerIdentifier(raw string, maxLen int) (string, *bearerError) {
|
||||
identifier := strings.TrimSpace(raw)
|
||||
if identifier == "" {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, "identifier claim empty")
|
||||
}
|
||||
if maxLen > 0 && len(identifier) > maxLen {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, "identifier exceeds max length")
|
||||
}
|
||||
for _, r := range identifier {
|
||||
if unicode.IsControl(r) {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, "identifier contains control character")
|
||||
}
|
||||
// Unicode bidi-override range (RTL spoofing of admin UI / SIEM).
|
||||
if (r >= 0x202A && r <= 0x202E) || (r >= 0x2066 && r <= 0x2069) {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, "identifier contains bidi-override character")
|
||||
}
|
||||
if r == ',' || r == ';' || r == '=' {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, "identifier contains delimiter character")
|
||||
}
|
||||
}
|
||||
return identifier, nil
|
||||
}
|
||||
|
||||
// resolveBearerIdentifier picks the principal identifier from claims using
|
||||
// the configured BearerIdentifierClaim (default "sub"). Decoupled from
|
||||
// userIdentifierClaim (cookie path) to avoid the unverified-email spoofing
|
||||
// vector documented in the spec §13.
|
||||
func resolveBearerIdentifier(claims map[string]interface{}, claimName string) (string, *bearerError) {
|
||||
if claimName == "" {
|
||||
claimName = "sub"
|
||||
}
|
||||
raw, ok := claims[claimName]
|
||||
if !ok {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, fmt.Sprintf("missing claim %q", claimName))
|
||||
}
|
||||
str, ok := raw.(string)
|
||||
if !ok {
|
||||
return "", newBearerError(bearerErrInvalidIdentifier, fmt.Sprintf("claim %q not a string", claimName))
|
||||
}
|
||||
return str, nil
|
||||
}
|
||||
|
||||
// enforceMultiAudienceAzp implements the spec hardening: when aud is a
|
||||
// multi-element array, require an azp claim equal to clientID. Single-string
|
||||
// aud is unaffected (existing verifyAudience handles it).
|
||||
func enforceMultiAudienceAzp(claims map[string]interface{}, clientID string) *bearerError {
|
||||
audRaw, ok := claims["aud"]
|
||||
if !ok {
|
||||
return nil // verifyToken already rejects missing aud
|
||||
}
|
||||
arr, ok := audRaw.([]interface{})
|
||||
if !ok {
|
||||
return nil // single-string aud
|
||||
}
|
||||
if len(arr) <= 1 {
|
||||
return nil
|
||||
}
|
||||
azpRaw, ok := claims["azp"]
|
||||
if !ok {
|
||||
return newBearerError(bearerErrInvalidToken, "multi-audience token missing azp")
|
||||
}
|
||||
azp, ok := azpRaw.(string)
|
||||
if !ok || azp == "" {
|
||||
return newBearerError(bearerErrInvalidToken, "multi-audience token has empty/non-string azp")
|
||||
}
|
||||
if azp != clientID {
|
||||
return newBearerError(bearerErrInvalidToken, "multi-audience token azp does not match clientID")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// enforceIatAge implements the spec MaxTokenAgeSeconds bound on iat. Bounds
|
||||
// clock-manipulation / forever-token abuse without rejecting tokens with a
|
||||
// normal iat just because the issuer's clock skews a few seconds.
|
||||
func enforceIatAge(claims map[string]interface{}, maxAge time.Duration) *bearerError {
|
||||
if maxAge <= 0 {
|
||||
return nil
|
||||
}
|
||||
iatRaw, ok := claims["iat"].(float64)
|
||||
if !ok {
|
||||
// jwt.Verify already requires iat; this branch shouldn't be reached.
|
||||
return newBearerError(bearerErrInvalidToken, "missing iat claim")
|
||||
}
|
||||
iat := time.Unix(int64(iatRaw), 0)
|
||||
if time.Since(iat) > maxAge {
|
||||
return newBearerError(bearerErrInvalidToken, "token iat outside age bound")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// hashIdentifierForLog returns a short SHA-256 prefix safe for info-level
|
||||
// logs. Full identifier is only emitted at debug. Satisfies the audit
|
||||
// requirement (trace which principal was rejected) without leaking PII.
|
||||
func hashIdentifierForLog(identifier string) string {
|
||||
if identifier == "" {
|
||||
return "(none)"
|
||||
}
|
||||
sum := sha256.Sum256([]byte(identifier))
|
||||
return hex.EncodeToString(sum[:4]) // 8 hex chars
|
||||
}
|
||||
|
||||
// --- Per-IP failure throttle ---
|
||||
|
||||
// bearerFailureTracker records consecutive bearer-auth 401s per source IP and
|
||||
// parks repeat offenders in a 429 penalty box. Limits offline-guessing-style
|
||||
// attacks and protects the shared rate-limiter / JWKS endpoint from being
|
||||
// burned by a single source.
|
||||
type bearerFailureTracker struct {
|
||||
mu sync.Mutex
|
||||
entries map[string]*bearerFailureEntry
|
||||
// Configuration snapshot. Captured at construction so a hot reconfigure
|
||||
// doesn't race with the per-request paths.
|
||||
threshold int
|
||||
window time.Duration
|
||||
penalty time.Duration
|
||||
}
|
||||
|
||||
type bearerFailureEntry struct {
|
||||
firstFailureAt time.Time
|
||||
penaltyUntil time.Time
|
||||
count int
|
||||
}
|
||||
|
||||
func newBearerFailureTracker(threshold int, window, penalty time.Duration) *bearerFailureTracker {
|
||||
if threshold <= 0 {
|
||||
threshold = 20
|
||||
}
|
||||
if window <= 0 {
|
||||
window = 60 * time.Second
|
||||
}
|
||||
if penalty <= 0 {
|
||||
penalty = 60 * time.Second
|
||||
}
|
||||
return &bearerFailureTracker{
|
||||
entries: make(map[string]*bearerFailureEntry),
|
||||
threshold: threshold,
|
||||
window: window,
|
||||
penalty: penalty,
|
||||
}
|
||||
}
|
||||
|
||||
// blocked reports whether the source IP is currently in the penalty box.
|
||||
// Returns (true, retryAfter) when blocked; (false, 0) when allowed.
|
||||
func (b *bearerFailureTracker) blocked(ip string) (bool, time.Duration) {
|
||||
if b == nil || ip == "" {
|
||||
return false, 0
|
||||
}
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
e, ok := b.entries[ip]
|
||||
if !ok {
|
||||
return false, 0
|
||||
}
|
||||
now := time.Now()
|
||||
if !e.penaltyUntil.IsZero() && now.Before(e.penaltyUntil) {
|
||||
return true, time.Until(e.penaltyUntil)
|
||||
}
|
||||
return false, 0
|
||||
}
|
||||
|
||||
// recordFailure increments the failure counter for the given IP and trips
|
||||
// the penalty box once threshold-within-window is exceeded.
|
||||
func (b *bearerFailureTracker) recordFailure(ip string) {
|
||||
if b == nil || ip == "" {
|
||||
return
|
||||
}
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
now := time.Now()
|
||||
e, ok := b.entries[ip]
|
||||
if !ok || now.Sub(e.firstFailureAt) > b.window {
|
||||
e = &bearerFailureEntry{firstFailureAt: now}
|
||||
b.entries[ip] = e
|
||||
}
|
||||
e.count++
|
||||
if e.count >= b.threshold {
|
||||
e.penaltyUntil = now.Add(b.penalty)
|
||||
}
|
||||
}
|
||||
|
||||
// recordSuccess clears the failure counter for the given IP after a
|
||||
// successful bearer auth.
|
||||
func (b *bearerFailureTracker) recordSuccess(ip string) {
|
||||
if b == nil || ip == "" {
|
||||
return
|
||||
}
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
delete(b.entries, ip)
|
||||
}
|
||||
|
||||
// clientIPForBearer returns the source IP used to key the failure tracker.
|
||||
// Trusts only the request's transport-level RemoteAddr; X-Forwarded-For is
|
||||
// intentionally ignored to avoid attacker-controlled key spoofing. Behind a
|
||||
// trusted reverse proxy where every request shares one IP, the throttle is
|
||||
// still useful (caps attacker churn through that proxy) — operators wanting
|
||||
// per-real-client throttling must terminate at this middleware.
|
||||
func clientIPForBearer(req *http.Request) string {
|
||||
if req == nil {
|
||||
return ""
|
||||
}
|
||||
host, _, err := net.SplitHostPort(req.RemoteAddr)
|
||||
if err != nil {
|
||||
return req.RemoteAddr
|
||||
}
|
||||
return host
|
||||
}
|
||||
|
||||
// --- Bearer auth entrypoint ---
|
||||
|
||||
// detectBearerToken returns (token, true) when the request carries a usable
|
||||
// Authorization: Bearer header. Case-insensitive on the scheme. Returns
|
||||
// ("", false) for any other shape.
|
||||
func detectBearerToken(req *http.Request) (string, bool) {
|
||||
if req == nil {
|
||||
return "", false
|
||||
}
|
||||
h := req.Header.Get("Authorization")
|
||||
if len(h) < len(bearerPrefix) {
|
||||
return "", false
|
||||
}
|
||||
if !strings.EqualFold(h[:len(bearerPrefix)], bearerPrefix) {
|
||||
return "", false
|
||||
}
|
||||
token := strings.TrimSpace(h[len(bearerPrefix):])
|
||||
if token == "" {
|
||||
return "", false
|
||||
}
|
||||
return token, true
|
||||
}
|
||||
|
||||
// hasSessionCookie reports whether the request carries any cookie matching
|
||||
// the session prefix. Used to implement the cookie-wins-by-default
|
||||
// precedence rule when both bearer and cookie are present.
|
||||
func (t *TraefikOidc) hasSessionCookie(req *http.Request) bool {
|
||||
if t.sessionManager == nil {
|
||||
return false
|
||||
}
|
||||
prefix := t.sessionManager.GetCookiePrefix()
|
||||
if prefix == "" {
|
||||
return false
|
||||
}
|
||||
for _, c := range req.Cookies() {
|
||||
if strings.HasPrefix(c.Name, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// writeBearerError writes the canonical 401/403/429/503 response per spec §9.
|
||||
// Body is always generic; reason is logged at debug only. The
|
||||
// WWW-Authenticate hint is gated by config (default on, RFC 6750 compliant).
|
||||
func (t *TraefikOidc) writeBearerError(rw http.ResponseWriter, req *http.Request, err *bearerError) {
|
||||
var (
|
||||
status int
|
||||
errCode string
|
||||
body string
|
||||
retryAfter time.Duration
|
||||
)
|
||||
switch err.kind {
|
||||
case bearerErrInvalidRequest:
|
||||
status = http.StatusUnauthorized
|
||||
errCode = "invalid_request"
|
||||
body = "Unauthorized"
|
||||
case bearerErrInvalidToken, bearerErrTokenInactive, bearerErrInvalidIdentifier:
|
||||
status = http.StatusUnauthorized
|
||||
errCode = "invalid_token"
|
||||
body = "Unauthorized"
|
||||
case bearerErrForbidden:
|
||||
status = http.StatusForbidden
|
||||
body = "Access denied"
|
||||
case bearerErrThrottled:
|
||||
status = http.StatusTooManyRequests
|
||||
body = "Too Many Requests"
|
||||
retryAfter = t.bearerFailurePenalty
|
||||
case bearerErrIntrospectionUnavailable:
|
||||
status = http.StatusServiceUnavailable
|
||||
body = "Service Unavailable"
|
||||
default:
|
||||
status = http.StatusUnauthorized
|
||||
body = "Unauthorized"
|
||||
}
|
||||
|
||||
if t.bearerEmitWWWAuthenticate && errCode != "" {
|
||||
rw.Header().Set("WWW-Authenticate", fmt.Sprintf(`Bearer error=%q`, errCode))
|
||||
}
|
||||
if retryAfter > 0 {
|
||||
rw.Header().Set("Retry-After", fmt.Sprintf("%d", int(retryAfter.Seconds())))
|
||||
}
|
||||
rw.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
rw.WriteHeader(status)
|
||||
_, _ = rw.Write([]byte(body)) // Safe to ignore: best-effort error body write
|
||||
|
||||
if t.logger != nil {
|
||||
t.logger.Debugf("bearer auth rejected: status=%d category=%v reason=%q path=%s",
|
||||
status, err.kind, err.reason, req.URL.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// handleBearerRequest is the entry point invoked by ServeHTTP when the
|
||||
// EnableBearerAuth flag is set, the request carries an Authorization: Bearer
|
||||
// header, and the (configurable) cookie-precedence rule allows the bearer
|
||||
// path to run.
|
||||
func (t *TraefikOidc) handleBearerRequest(rw http.ResponseWriter, req *http.Request) {
|
||||
ip := clientIPForBearer(req)
|
||||
|
||||
if blocked, retryAfter := t.bearerFailureTracker.blocked(ip); blocked {
|
||||
throttled := newBearerError(bearerErrThrottled, "ip in penalty box")
|
||||
// Preserve the actual retry-after even if it diverged from the
|
||||
// configured default (clock-skew, partial-window expiry).
|
||||
if retryAfter > 0 {
|
||||
rw.Header().Set("Retry-After", fmt.Sprintf("%d", int(retryAfter.Seconds())))
|
||||
}
|
||||
t.writeBearerError(rw, req, throttled)
|
||||
return
|
||||
}
|
||||
|
||||
token, ok := detectBearerToken(req)
|
||||
if !ok {
|
||||
t.bearerFailureTracker.recordFailure(ip)
|
||||
t.writeBearerError(rw, req, newBearerError(bearerErrInvalidRequest, "missing or empty bearer token"))
|
||||
return
|
||||
}
|
||||
if len(token) > AccessTokenConfig.MaxLength {
|
||||
t.bearerFailureTracker.recordFailure(ip)
|
||||
t.writeBearerError(rw, req, newBearerError(bearerErrInvalidToken, "token exceeds max length"))
|
||||
return
|
||||
}
|
||||
if strings.Count(token, ".") != 2 {
|
||||
t.bearerFailureTracker.recordFailure(ip)
|
||||
t.writeBearerError(rw, req, newBearerError(bearerErrInvalidToken, "token is not a 3-segment JWT"))
|
||||
return
|
||||
}
|
||||
|
||||
if bErr := parseBearerJOSEHeader(token); bErr != nil {
|
||||
t.bearerFailureTracker.recordFailure(ip)
|
||||
t.writeBearerError(rw, req, bErr)
|
||||
return
|
||||
}
|
||||
|
||||
p, bErr := t.buildPrincipalFromBearerToken(token)
|
||||
if bErr != nil {
|
||||
t.bearerFailureTracker.recordFailure(ip)
|
||||
t.writeBearerError(rw, req, bErr)
|
||||
return
|
||||
}
|
||||
|
||||
t.bearerFailureTracker.recordSuccess(ip)
|
||||
if t.logger != nil {
|
||||
t.logger.Debugf("bearer auth success: identifier_hash=%s path=%s",
|
||||
hashIdentifierForLog(p.Identifier), req.URL.Path)
|
||||
}
|
||||
t.forwardAuthorized(rw, req, p)
|
||||
}
|
||||
|
||||
// buildPrincipalFromBearerToken runs the full bearer verification pipeline
|
||||
// described in spec §7.3 and returns a principal ready for forwardAuthorized.
|
||||
// Returns a typed *bearerError on failure so the caller can map to status.
|
||||
func (t *TraefikOidc) buildPrincipalFromBearerToken(token string) (*principal, *bearerError) {
|
||||
if err := t.verifyTokenWithOpts(token, verifyOpts{skipReplayMarking: true}); err != nil {
|
||||
return nil, newBearerError(bearerErrInvalidToken, "token verification failed: "+err.Error())
|
||||
}
|
||||
|
||||
parsed, err := parseJWT(token)
|
||||
if err != nil {
|
||||
return nil, newBearerError(bearerErrInvalidToken, "post-verify parseJWT failed: "+err.Error())
|
||||
}
|
||||
claims := parsed.Claims
|
||||
|
||||
// Token-type guard. Reuse the well-tested classifier which already
|
||||
// checks nonce / typ=at+jwt / token_use / scope / aud-vs-clientID.
|
||||
if t.detectTokenType(parsed, token) {
|
||||
return nil, newBearerError(bearerErrInvalidToken, "ID tokens are not accepted on the bearer path")
|
||||
}
|
||||
// Belt-and-braces explicit rejection (cheap, catches edge cases not
|
||||
// covered by detectTokenType's heuristic).
|
||||
if nonce, ok := claims["nonce"].(string); ok && nonce != "" {
|
||||
return nil, newBearerError(bearerErrInvalidToken, "nonce claim present (ID-token shape)")
|
||||
}
|
||||
if tu, ok := claims["token_use"].(string); ok && tu == "id" {
|
||||
return nil, newBearerError(bearerErrInvalidToken, "token_use=id rejected")
|
||||
}
|
||||
|
||||
if bErr := enforceMultiAudienceAzp(claims, t.clientID); bErr != nil {
|
||||
return nil, bErr
|
||||
}
|
||||
if bErr := enforceIatAge(claims, t.maxTokenAge); bErr != nil {
|
||||
return nil, bErr
|
||||
}
|
||||
|
||||
if t.requireTokenIntrospection {
|
||||
if bErr := t.introspectOnBearerPath(token); bErr != nil {
|
||||
return nil, bErr
|
||||
}
|
||||
}
|
||||
|
||||
rawIdentifier, bErr := resolveBearerIdentifier(claims, t.bearerIdentifierClaim)
|
||||
if bErr != nil {
|
||||
return nil, bErr
|
||||
}
|
||||
identifier, bErr := sanitizeBearerIdentifier(rawIdentifier, t.maxIdentifierLength)
|
||||
if bErr != nil {
|
||||
return nil, bErr
|
||||
}
|
||||
|
||||
subject, _ := claims["sub"].(string)
|
||||
clientID, _ := claims["azp"].(string)
|
||||
if clientID == "" {
|
||||
clientID, _ = claims["client_id"].(string)
|
||||
}
|
||||
|
||||
return &principal{
|
||||
Source: sourceBearer,
|
||||
Identifier: identifier,
|
||||
Subject: subject,
|
||||
ClientID: clientID,
|
||||
Claims: claims,
|
||||
AccessToken: token,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// introspectOnBearerPath calls the existing RFC 7662 introspector when the
|
||||
// operator demands real-time revocation. Distinguishes "token revoked" (401)
|
||||
// from "endpoint unavailable" (503) so transient infra failures don't look
|
||||
// like credential failures.
|
||||
func (t *TraefikOidc) introspectOnBearerPath(token string) *bearerError {
|
||||
resp, err := t.introspectToken(token)
|
||||
if err != nil {
|
||||
return newBearerError(bearerErrIntrospectionUnavailable, "introspection failed: "+err.Error())
|
||||
}
|
||||
if !resp.Active {
|
||||
return newBearerError(bearerErrTokenInactive, "introspection reports token inactive")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,812 @@
|
||||
package traefikoidc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
// =============================================================================
|
||||
// Helper builders
|
||||
// =============================================================================
|
||||
|
||||
// makeBearerJWT constructs a JWT with explicit header + claims for tests.
|
||||
// Signature is opaque (b64("signature")) — bearer tests don't exercise the
|
||||
// real cryptographic verifier; verification is bypassed via tokenCache pre-
|
||||
// seed so the bearer pipeline under test sees a "verified" token.
|
||||
func makeBearerJWT(t *testing.T, header, claims map[string]interface{}) string {
|
||||
t.Helper()
|
||||
hb, err := json.Marshal(header)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal header: %v", err)
|
||||
}
|
||||
cb, err := json.Marshal(claims)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal claims: %v", err)
|
||||
}
|
||||
return fmt.Sprintf("%s.%s.%s",
|
||||
base64.RawURLEncoding.EncodeToString(hb),
|
||||
base64.RawURLEncoding.EncodeToString(cb),
|
||||
base64.RawURLEncoding.EncodeToString([]byte("signature")),
|
||||
)
|
||||
}
|
||||
|
||||
// defaultBearerHeader produces the standard RS256+kid header used in tests.
|
||||
func defaultBearerHeader() map[string]interface{} {
|
||||
return map[string]interface{}{"alg": "RS256", "kid": "test-kid"}
|
||||
}
|
||||
|
||||
// defaultBearerClaims produces a baseline access-token claim set. Tests
|
||||
// shallow-clone and override fields as needed.
|
||||
func defaultBearerClaims() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"iss": "https://issuer.example.com",
|
||||
"aud": "https://api.example.com",
|
||||
"sub": "service-account-1",
|
||||
"scope": "api:read api:write",
|
||||
"exp": float64(time.Now().Add(time.Hour).Unix()),
|
||||
"iat": float64(time.Now().Unix()),
|
||||
}
|
||||
}
|
||||
|
||||
// makeBearerOIDC constructs a TraefikOidc wired for bearer auth tests. The
|
||||
// real verifyTokenWithOpts pipeline is short-circuited via tokenCache pre-
|
||||
// seed: any token Set into t.tokenCache returns nil from VerifyToken,
|
||||
// letting tests exercise the post-verify bearer logic (classifier, identifier,
|
||||
// throttle, header forwarding) without standing up JWKs.
|
||||
func makeBearerOIDC(t *testing.T, next http.Handler) *TraefikOidc {
|
||||
t.Helper()
|
||||
sm := createTestSessionManager(t)
|
||||
oidc := &TraefikOidc{
|
||||
next: next,
|
||||
logger: NewLogger("error"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sm,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://issuer.example.com",
|
||||
audience: "https://api.example.com",
|
||||
clientID: "https://api.example.com",
|
||||
tokenCache: NewTokenCache(),
|
||||
excludedURLs: map[string]struct{}{"/favicon.ico": {}},
|
||||
allowedRolesAndGroups: map[string]struct{}{},
|
||||
limiter: rate.NewLimiter(rate.Every(time.Second), 1000),
|
||||
ctx: context.Background(),
|
||||
enableBearerAuth: true,
|
||||
stripAuthorizationHeader: true,
|
||||
bearerEmitWWWAuthenticate: true,
|
||||
bearerOverridesCookie: false,
|
||||
bearerIdentifierClaim: "sub",
|
||||
maxIdentifierLength: 256,
|
||||
maxTokenAge: 24 * time.Hour,
|
||||
bearerFailureThreshold: 20,
|
||||
bearerFailureWindow: 60 * time.Second,
|
||||
bearerFailurePenalty: 60 * time.Second,
|
||||
bearerFailureTracker: newBearerFailureTracker(20, 60*time.Second, 60*time.Second),
|
||||
}
|
||||
oidc.extractClaimsFunc = extractClaims
|
||||
close(oidc.initComplete)
|
||||
return oidc
|
||||
}
|
||||
|
||||
// seedVerified pre-populates the tokenCache so verifyTokenWithOpts short-
|
||||
// circuits to nil for the given token. Mirrors the production fast-return
|
||||
// path at token_manager.go for previously-verified tokens.
|
||||
func seedVerified(t *testing.T, oidc *TraefikOidc, token string, claims map[string]interface{}) {
|
||||
t.Helper()
|
||||
if oidc.tokenCache == nil {
|
||||
oidc.tokenCache = NewTokenCache()
|
||||
}
|
||||
oidc.tokenCache.Set(token, claims, time.Hour)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Unit tests — small helpers
|
||||
// =============================================================================
|
||||
|
||||
func TestDetectBearerToken(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
header string
|
||||
want string
|
||||
ok bool
|
||||
}{
|
||||
{"missing header", "", "", false},
|
||||
{"basic auth", "Basic abc", "", false},
|
||||
{"bearer with token", "Bearer abc.def.ghi", "abc.def.ghi", true},
|
||||
{"lowercase bearer", "bearer abc.def.ghi", "abc.def.ghi", true},
|
||||
{"mixed case", "BeArEr abc.def.ghi", "abc.def.ghi", true},
|
||||
{"empty token after prefix", "Bearer ", "", false},
|
||||
{"bearer no space", "Bearerabc", "", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "/", nil)
|
||||
if tc.header != "" {
|
||||
req.Header.Set("Authorization", tc.header)
|
||||
}
|
||||
got, ok := detectBearerToken(req)
|
||||
if ok != tc.ok || got != tc.want {
|
||||
t.Fatalf("got=(%q, %v), want=(%q, %v)", got, ok, tc.want, tc.ok)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBearerJOSEHeader(t *testing.T) {
|
||||
t.Parallel()
|
||||
mk := func(t *testing.T, h map[string]interface{}) string {
|
||||
return makeBearerJWT(t, h, map[string]interface{}{"sub": "x"})
|
||||
}
|
||||
cases := []struct {
|
||||
header map[string]interface{}
|
||||
name string
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "valid RS256", header: map[string]interface{}{"alg": "RS256", "kid": "k1"}, wantErr: false},
|
||||
{name: "valid ES512", header: map[string]interface{}{"alg": "ES512", "kid": "abc-_.="}, wantErr: false},
|
||||
{name: "alg=none rejected", header: map[string]interface{}{"alg": "none", "kid": "k1"}, wantErr: true},
|
||||
{name: "alg=HS256 rejected", header: map[string]interface{}{"alg": "HS256", "kid": "k1"}, wantErr: true},
|
||||
{name: "missing kid", header: map[string]interface{}{"alg": "RS256"}, wantErr: true},
|
||||
{name: "kid too long", header: map[string]interface{}{"alg": "RS256", "kid": strings.Repeat("a", bearerKidMaxLen+1)}, wantErr: true},
|
||||
{name: "kid bad chars", header: map[string]interface{}{"alg": "RS256", "kid": "evil/../etc/passwd"}, wantErr: true},
|
||||
{name: "kid with space", header: map[string]interface{}{"alg": "RS256", "kid": "key one"}, wantErr: true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
token := mk(t, tc.header)
|
||||
err := parseBearerJOSEHeader(token)
|
||||
if (err != nil) != tc.wantErr {
|
||||
t.Fatalf("err=%v wantErr=%v", err, tc.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitiseBearerIdentifier(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
wantErr bool
|
||||
}{
|
||||
{"normal sub", "service-account-1", "service-account-1", false},
|
||||
{"email-like", "alice@example.com", "alice@example.com", false},
|
||||
{"trim whitespace", " abc ", "abc", false},
|
||||
{"empty", "", "", true},
|
||||
{"only whitespace", " ", "", true},
|
||||
{"control char (newline)", "alice\nbob", "", true},
|
||||
{"control char (CR)", "alice\rbob", "", true},
|
||||
{"control char (NUL)", "alice\x00bob", "", true},
|
||||
{"bidi override", "alice\u202ebob", "", true},
|
||||
{"bidi isolate", "alice\u2066bob", "", true},
|
||||
{"comma delimiter", "alice,bob", "", true},
|
||||
{"semicolon delimiter", "alice;bob", "", true},
|
||||
{"equals delimiter", "alice=bob", "", true},
|
||||
{"over length", strings.Repeat("a", 257), "", true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got, err := sanitizeBearerIdentifier(tc.in, 256)
|
||||
if (err != nil) != tc.wantErr {
|
||||
t.Fatalf("err=%v wantErr=%v", err, tc.wantErr)
|
||||
}
|
||||
if !tc.wantErr && got != tc.want {
|
||||
t.Fatalf("got=%q want=%q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveBearerIdentifier(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
claims map[string]interface{}
|
||||
name string
|
||||
claim string
|
||||
want string
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "default sub", claims: map[string]interface{}{"sub": "abc"}, claim: "", want: "abc"},
|
||||
{name: "explicit sub", claims: map[string]interface{}{"sub": "abc"}, claim: "sub", want: "abc"},
|
||||
{name: "custom client_id claim", claims: map[string]interface{}{"client_id": "svc"}, claim: "client_id", want: "svc"},
|
||||
{name: "missing claim", claims: map[string]interface{}{"other": "x"}, claim: "sub", wantErr: true},
|
||||
{name: "non-string claim", claims: map[string]interface{}{"sub": 123}, claim: "sub", wantErr: true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got, err := resolveBearerIdentifier(tc.claims, tc.claim)
|
||||
if (err != nil) != tc.wantErr {
|
||||
t.Fatalf("err=%v wantErr=%v", err, tc.wantErr)
|
||||
}
|
||||
if !tc.wantErr && got != tc.want {
|
||||
t.Fatalf("got=%q want=%q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnforceMultiAudienceAzp(t *testing.T) {
|
||||
t.Parallel()
|
||||
const cid = "https://api.example.com"
|
||||
cases := []struct {
|
||||
claims map[string]interface{}
|
||||
name string
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "single string aud", claims: map[string]interface{}{"aud": "x"}, wantErr: false},
|
||||
{name: "single element array", claims: map[string]interface{}{"aud": []interface{}{"x"}}, wantErr: false},
|
||||
{name: "multi-aud with matching azp", claims: map[string]interface{}{"aud": []interface{}{"a", "b"}, "azp": cid}, wantErr: false},
|
||||
{name: "multi-aud missing azp", claims: map[string]interface{}{"aud": []interface{}{"a", "b"}}, wantErr: true},
|
||||
{name: "multi-aud empty azp", claims: map[string]interface{}{"aud": []interface{}{"a", "b"}, "azp": ""}, wantErr: true},
|
||||
{name: "multi-aud wrong azp", claims: map[string]interface{}{"aud": []interface{}{"a", "b"}, "azp": "other"}, wantErr: true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
err := enforceMultiAudienceAzp(tc.claims, cid)
|
||||
if (err != nil) != tc.wantErr {
|
||||
t.Fatalf("err=%v wantErr=%v", err, tc.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnforceIatAge(t *testing.T) {
|
||||
t.Parallel()
|
||||
now := time.Now()
|
||||
cases := []struct {
|
||||
name string
|
||||
iat float64
|
||||
maxAge time.Duration
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "fresh", iat: float64(now.Unix()), maxAge: time.Hour, wantErr: false},
|
||||
{name: "23h59m old, max 24h", iat: float64(now.Add(-23*time.Hour - 59*time.Minute).Unix()), maxAge: 24 * time.Hour, wantErr: false},
|
||||
{name: "25h old, max 24h", iat: float64(now.Add(-25 * time.Hour).Unix()), maxAge: 24 * time.Hour, wantErr: true},
|
||||
{name: "1970 token", iat: float64(0), maxAge: 24 * time.Hour, wantErr: true},
|
||||
{name: "maxAge disabled (0)", iat: float64(0), maxAge: 0, wantErr: false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
err := enforceIatAge(map[string]interface{}{"iat": tc.iat}, tc.maxAge)
|
||||
if (err != nil) != tc.wantErr {
|
||||
t.Fatalf("err=%v wantErr=%v", err, tc.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBearerFailureTracker(t *testing.T) {
|
||||
t.Parallel()
|
||||
tr := newBearerFailureTracker(3, 60*time.Second, 60*time.Second)
|
||||
const ip = "10.0.0.1"
|
||||
// Below threshold: not blocked.
|
||||
for i := 0; i < 2; i++ {
|
||||
tr.recordFailure(ip)
|
||||
if b, _ := tr.blocked(ip); b {
|
||||
t.Fatalf("blocked too early after %d failures", i+1)
|
||||
}
|
||||
}
|
||||
// Threshold reached: blocked.
|
||||
tr.recordFailure(ip)
|
||||
if b, retry := tr.blocked(ip); !b || retry <= 0 {
|
||||
t.Fatalf("expected blocked with positive retry, got=%v retry=%v", b, retry)
|
||||
}
|
||||
// Success clears the counter.
|
||||
tr.recordSuccess(ip)
|
||||
if b, _ := tr.blocked(ip); b {
|
||||
t.Fatalf("expected unblocked after success")
|
||||
}
|
||||
// Other IPs are unaffected.
|
||||
if b, _ := tr.blocked("10.0.0.2"); b {
|
||||
t.Fatalf("unrelated IP should not be blocked")
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Integration tests — full ServeHTTP via the bearer pipeline
|
||||
// =============================================================================
|
||||
|
||||
func TestServeHTTP_Bearer_HappyPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
var nextCalled atomic.Bool
|
||||
var capturedHeaders http.Header
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
nextCalled.Store(true)
|
||||
capturedHeaders = r.Header.Clone()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if !nextCalled.Load() {
|
||||
t.Fatalf("expected next handler to run; got status=%d body=%q", rw.Code, rw.Body.String())
|
||||
}
|
||||
if rw.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d, want 200", rw.Code)
|
||||
}
|
||||
if got := capturedHeaders.Get("X-Forwarded-User"); got != "service-account-1" {
|
||||
t.Fatalf("X-Forwarded-User=%q, want service-account-1", got)
|
||||
}
|
||||
if got := capturedHeaders.Get("Authorization"); got != "" {
|
||||
t.Fatalf("Authorization should be stripped, got=%q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_StripAuthDisabled(t *testing.T) {
|
||||
t.Parallel()
|
||||
var capturedAuth string
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
capturedAuth = r.Header.Get("Authorization")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
oidc.stripAuthorizationHeader = false
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if !strings.HasPrefix(capturedAuth, "Bearer ") {
|
||||
t.Fatalf("expected Authorization to be forwarded, got=%q", capturedAuth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_RejectIDToken(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for ID token rejection")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
// ID-token shape: nonce claim present and no scope. detectTokenType
|
||||
// returns true.
|
||||
claims := map[string]interface{}{
|
||||
"iss": "https://issuer.example.com",
|
||||
"aud": "https://api.example.com",
|
||||
"sub": "user-1",
|
||||
"nonce": "n-0S6_WzA2Mj",
|
||||
"exp": float64(time.Now().Add(time.Hour).Unix()),
|
||||
"iat": float64(time.Now().Unix()),
|
||||
}
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
if wa := rw.Header().Get("WWW-Authenticate"); !strings.Contains(wa, `error="invalid_token"`) {
|
||||
t.Fatalf("expected WWW-Authenticate invalid_token, got=%q", wa)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_AlgNoneRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for alg=none")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
header := map[string]interface{}{"alg": "none", "kid": "k1"}
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, header, claims)
|
||||
// Even if we pre-seeded the cache, the early alg pin runs FIRST.
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_KidTooLongRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for oversized kid")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
header := map[string]interface{}{"alg": "RS256", "kid": strings.Repeat("a", bearerKidMaxLen+1)}
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, header, claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_MultiAudRequiresAzp(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for multi-aud without azp")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
claims["aud"] = []interface{}{"https://api.example.com", "https://other.example.com"}
|
||||
delete(claims, "azp")
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_MultiAudWithAzpAccepted(t *testing.T) {
|
||||
t.Parallel()
|
||||
var nextCalled atomic.Bool
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
nextCalled.Store(true)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
claims["aud"] = []interface{}{"https://api.example.com", "https://other.example.com"}
|
||||
claims["azp"] = oidc.clientID
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusOK || !nextCalled.Load() {
|
||||
t.Fatalf("expected 200 + next called; got status=%d called=%v", rw.Code, nextCalled.Load())
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_IatTooOldRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for old iat")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
claims["iat"] = float64(time.Now().Add(-25 * time.Hour).Unix())
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_IdentifierWithBidiRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for bidi identifier")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
claims["sub"] = "alice\u202ebob"
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_ReplayRegression(t *testing.T) {
|
||||
t.Parallel()
|
||||
var successCount atomic.Int32
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
successCount.Add(1)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
claims["jti"] = "regression-jti"
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
if rw.Code != http.StatusOK {
|
||||
t.Fatalf("iteration %d: status=%d, want 200", i, rw.Code)
|
||||
}
|
||||
}
|
||||
if successCount.Load() != 100 {
|
||||
t.Fatalf("successCount=%d, want 100", successCount.Load())
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_ThrottleTrips429(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run during throttle test")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
oidc.bearerFailureTracker = newBearerFailureTracker(3, 60*time.Second, 60*time.Second)
|
||||
|
||||
// Send malformed bearers from the same RemoteAddr until threshold trips.
|
||||
send := func() *httptest.ResponseRecorder {
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.RemoteAddr = "10.0.0.5:1234"
|
||||
req.Header.Set("Authorization", "Bearer not-a-jwt")
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
return rw
|
||||
}
|
||||
for i := 0; i < 3; i++ {
|
||||
rw := send()
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("pre-throttle iteration %d: status=%d, want 401", i, rw.Code)
|
||||
}
|
||||
}
|
||||
// 4th request: throttled.
|
||||
rw := send()
|
||||
if rw.Code != http.StatusTooManyRequests {
|
||||
t.Fatalf("expected 429 after threshold, got %d", rw.Code)
|
||||
}
|
||||
if ra := rw.Header().Get("Retry-After"); ra == "" {
|
||||
t.Fatalf("expected Retry-After header on 429")
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_ExcludedURLStripsAuth(t *testing.T) {
|
||||
t.Parallel()
|
||||
var capturedAuth string
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
capturedAuth = r.Header.Get("Authorization")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
oidc.excludedURLs = map[string]struct{}{"/favicon.ico": {}}
|
||||
|
||||
req := httptest.NewRequest("GET", "/favicon.ico", nil)
|
||||
req.Header.Set("Authorization", "Bearer abc.def.ghi")
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if rw.Code != http.StatusOK {
|
||||
t.Fatalf("excluded path should pass; got %d", rw.Code)
|
||||
}
|
||||
if capturedAuth != "" {
|
||||
t.Fatalf("Authorization must be stripped on excluded paths, got=%q", capturedAuth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_RolesGate(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
rolesClaim []interface{}
|
||||
want int
|
||||
}{
|
||||
{name: "matching role", rolesClaim: []interface{}{"admin"}, want: http.StatusOK},
|
||||
{name: "no matching role", rolesClaim: []interface{}{"viewer"}, want: http.StatusForbidden},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
oidc.allowedRolesAndGroups = map[string]struct{}{"admin": {}}
|
||||
oidc.roleClaimName = "roles"
|
||||
claims := defaultBearerClaims()
|
||||
claims["roles"] = tc.rolesClaim
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
if rw.Code != tc.want {
|
||||
t.Fatalf("status=%d, want %d", rw.Code, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_CookieWinsByDefault(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Both cookie and bearer present: cookie path runs (which will redirect
|
||||
// to /authorize since the cookie is empty/unauthenticated).
|
||||
var nextCalled atomic.Bool
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
nextCalled.Store(true)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
prefix := oidc.sessionManager.GetCookiePrefix()
|
||||
req.AddCookie(&http.Cookie{Name: prefix + "main", Value: "irrelevant"})
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
// Cookie path consumed the request; bearer was ignored. Since the
|
||||
// cookie is empty, the cookie path will either 302 to /authorize or
|
||||
// return 401 — in either case, next must NOT be called.
|
||||
if nextCalled.Load() {
|
||||
t.Fatalf("next must not be called when bearer is ignored due to cookie precedence")
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_BearerOverridesCookie(t *testing.T) {
|
||||
t.Parallel()
|
||||
var nextCalled atomic.Bool
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
nextCalled.Store(true)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
oidc.bearerOverridesCookie = true
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
prefix := oidc.sessionManager.GetCookiePrefix()
|
||||
req.AddCookie(&http.Cookie{Name: prefix + "main", Value: "irrelevant"})
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
|
||||
if !nextCalled.Load() || rw.Code != http.StatusOK {
|
||||
t.Fatalf("expected bearer to win with override; status=%d called=%v", rw.Code, nextCalled.Load())
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_OversizedToken(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for oversized token")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
huge := strings.Repeat("a", AccessTokenConfig.MaxLength+1)
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+huge)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_MalformedJWT(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatalf("next must not run for malformed JWT")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer not.jwt") // 1 dot
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
if rw.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("status=%d, want 401", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeHTTP_Bearer_FeatureOffPassesThrough(t *testing.T) {
|
||||
t.Parallel()
|
||||
next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Should not be reached: cookie path runs and (with no session)
|
||||
// will redirect or 401. We assert no panic / next not called.
|
||||
t.Fatalf("next must not run when bearer is off and no valid session exists")
|
||||
})
|
||||
oidc := makeBearerOIDC(t, next)
|
||||
oidc.enableBearerAuth = false
|
||||
claims := defaultBearerClaims()
|
||||
token := makeBearerJWT(t, defaultBearerHeader(), claims)
|
||||
seedVerified(t, oidc, token, claims)
|
||||
req := httptest.NewRequest("GET", "/api/work", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
rw := httptest.NewRecorder()
|
||||
oidc.ServeHTTP(rw, req)
|
||||
// Expect non-200: either 302 to /authorize or 401. The point is the
|
||||
// bearer pipeline didn't run.
|
||||
if rw.Code == http.StatusOK {
|
||||
t.Fatalf("expected non-200 when bearer is off; got %d", rw.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Startup validation tests
|
||||
// =============================================================================
|
||||
|
||||
func TestStartupValidation_BearerRequiresAudience(t *testing.T) {
|
||||
t.Parallel()
|
||||
cfg := CreateConfig()
|
||||
cfg.ProviderURL = "https://issuer.example.com"
|
||||
cfg.ClientID = "id"
|
||||
cfg.ClientSecret = "secret"
|
||||
cfg.CallbackURL = "/oauth/callback"
|
||||
cfg.SessionEncryptionKey = "0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
cfg.EnableBearerAuth = true
|
||||
cfg.Audience = ""
|
||||
_, err := New(context.Background(), http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}), cfg, "bearer-test")
|
||||
if err == nil || !strings.Contains(err.Error(), "requires Audience") {
|
||||
t.Fatalf("expected audience-required error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartupValidation_BearerRejectsEmailIdentifier(t *testing.T) {
|
||||
t.Parallel()
|
||||
cfg := CreateConfig()
|
||||
cfg.ProviderURL = "https://issuer.example.com"
|
||||
cfg.ClientID = "id"
|
||||
cfg.ClientSecret = "secret"
|
||||
cfg.CallbackURL = "/oauth/callback"
|
||||
cfg.SessionEncryptionKey = "0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
cfg.EnableBearerAuth = true
|
||||
cfg.Audience = "https://api.example.com"
|
||||
cfg.BearerIdentifierClaim = "email"
|
||||
_, err := New(context.Background(), http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}), cfg, "bearer-test")
|
||||
if err == nil || !strings.Contains(err.Error(), "bearerIdentifierClaim=\"email\"") {
|
||||
t.Fatalf("expected email-identifier rejection, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Principal invariants
|
||||
// =============================================================================
|
||||
|
||||
func TestBuildPrincipalFromSession_NoIdentifier(t *testing.T) {
|
||||
t.Parallel()
|
||||
oidc := &TraefikOidc{logger: NewLogger("error")}
|
||||
if p := oidc.buildPrincipalFromSession(nil); p != nil {
|
||||
t.Fatalf("nil session must produce nil principal")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,250 @@
|
||||
# Bearer Token (M2M) Authentication
|
||||
|
||||
Opt-in path that lets API clients present `Authorization: Bearer <jwt>` to
|
||||
authenticate without going through the cookie-based OIDC redirect flow.
|
||||
Designed for machine-to-machine (M2M) traffic — services calling other
|
||||
services with tokens minted by your OIDC provider.
|
||||
|
||||
The bearer path lives next to the cookie path: both go through the same
|
||||
post-auth pipeline (`forwardAuthorized`) that injects identity headers,
|
||||
checks `allowedRolesAndGroups`, applies security headers, and forwards to
|
||||
the backend. The only thing that differs is how the principal is established
|
||||
for that single request.
|
||||
|
||||
## Quick start
|
||||
|
||||
```yaml
|
||||
enableBearerAuth: true
|
||||
audience: https://api.example.com # REQUIRED when bearer is enabled
|
||||
clientID: my-api-client-id
|
||||
providerURL: https://issuer.example.com
|
||||
sessionEncryptionKey: <32+-byte secret>
|
||||
callbackURL: /oauth2/callback
|
||||
```
|
||||
|
||||
That is the minimum. Everything else has a secure default.
|
||||
|
||||
## Obtaining bearer tokens from your OIDC provider
|
||||
|
||||
The middleware only **validates** bearer tokens — minting them is the IdP's job. For M2M traffic the canonical mint flow is OAuth 2.0 **`client_credentials`** (RFC 6749 §4.4); some providers require **JWT bearer assertion** (RFC 7523) instead.
|
||||
|
||||
```
|
||||
┌────────────┐ POST /token ┌──────────┐
|
||||
│ client │ ───────────────────────────────►│ IdP │
|
||||
│ (service) │ grant_type=client_credentials │ /token │
|
||||
│ │ client_id=… │ │
|
||||
│ │ client_secret=… (or JWT) │ │
|
||||
│ │ audience=https://api.… ←── critical │
|
||||
│ │ scope=api:read … │
|
||||
│ │ ◄───────────────────────────────│ │
|
||||
│ │ access_token (JWT) │ │
|
||||
└────────────┘ └──────────┘
|
||||
│
|
||||
│ GET /protected
|
||||
│ Authorization: Bearer <access_token>
|
||||
▼
|
||||
Your service (behind Traefik + this plugin)
|
||||
```
|
||||
|
||||
The IdP returns a JWT signed by the same JWKs the middleware already trusts (it discovers them from `providerURL`/.well-known). On the first protected request, the middleware verifies signature + issuer + **audience** + `exp` + identifier claim, then forwards downstream with `X-Forwarded-User` set.
|
||||
|
||||
### Minimal worked example (Auth0-shape)
|
||||
|
||||
```bash
|
||||
# 1. Mint a token
|
||||
curl -s -X POST https://issuer.example.com/oauth/token \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": "your-m2m-client-id",
|
||||
"client_secret": "your-m2m-client-secret",
|
||||
"audience": "https://api.example.com",
|
||||
"scope": "api:read api:write"
|
||||
}'
|
||||
# → {"access_token":"eyJhbGciOiJSUzI1NiIs…","token_type":"Bearer","expires_in":86400,…}
|
||||
|
||||
# 2. Use it
|
||||
curl -H 'Authorization: Bearer eyJhbGciOiJSUzI1NiIs…' https://api.example.com/protected
|
||||
```
|
||||
|
||||
The `audience` field in the token request **must match** the `audience` you configured on the middleware. Mismatch → 401 with `Bearer error="invalid_token"`.
|
||||
|
||||
### Per-provider quick reference
|
||||
|
||||
| Provider | Grant | Token endpoint | Audience parameter | Notes |
|
||||
|---|---|---|---|---|
|
||||
| **Auth0** | `client_credentials` | `https://TENANT.auth0.com/oauth/token` | `audience=<your API identifier>` | Register an "API" + "Machine to Machine Application" authorised against that API. Without `audience` you get an opaque /userinfo token, which the bearer path rejects. See `docs/AUTH0_AUDIENCE_GUIDE.md`. |
|
||||
| **Okta** | `client_credentials` | `https://TENANT.okta.com/oauth2/default/v1/token` | Configured in the authorization server; default `aud` is the auth-server URL | Service app must enable the `client_credentials` flow and be granted the requested scopes. |
|
||||
| **Keycloak** | `client_credentials` | `https://kc/realms/REALM/protocol/openid-connect/token` | Configure an "Audience" mapper on a client scope, or use `client_id` as the audience | Client must have `serviceAccountsEnabled: true` plus role mappings. |
|
||||
| **Entra ID / Azure AD** | `client_credentials` (v2.0 endpoint) | `https://login.microsoftonline.com/TENANT/oauth2/v2.0/token` | Pass `scope=<App ID URI>/.default`; `aud` ends up being the API's App ID URI | Requires an App Registration + API permissions + admin consent. **Use the v2.0 endpoint** — v1 issues Microsoft-proprietary access tokens that are opaque to non-Microsoft clients. |
|
||||
| **AWS Cognito** | `client_credentials` | `https://YOUR_DOMAIN.auth.REGION.amazoncognito.com/oauth2/token` | Scopes from a "Resource Server" attached to your User Pool | App client must have `client_credentials` flow enabled. Use HTTP **Basic** auth header for `client_id:client_secret`. |
|
||||
| **GitLab** | `client_credentials` | `https://gitlab.com/oauth/token` | Audience matches the GitLab issuer | Rarely used for protecting external APIs; better suited for GitLab's own resources. |
|
||||
| **Google** | **JWT bearer (RFC 7523)** — *not* `client_credentials` | `https://oauth2.googleapis.com/token` | Signed assertion JWT carries `aud=https://oauth2.googleapis.com/token`; resulting access token is **opaque** unless you specifically request a Google-issued JWT for your API | Google service-account flow is not the best fit for this middleware (opaque tokens are rejected on the bearer path). Run Auth0 / Okta / Keycloak in front, or use ID-token-based flows on the cookie path. |
|
||||
|
||||
### RFC 7523 (JWT bearer assertion) — secretless alternative
|
||||
|
||||
When shared secrets are forbidden (FAPI, internal compliance), swap `client_secret` for a signed JWT assertion:
|
||||
|
||||
```
|
||||
POST /token
|
||||
grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer
|
||||
assertion=<JWT signed by the client's private key>
|
||||
```
|
||||
|
||||
The assertion JWT carries `iss=<client_id>`, `sub=<client_id>`, `aud=<token endpoint>`, `exp`. The IdP verifies the signature against a public key you've pre-registered and returns an access token.
|
||||
|
||||
This middleware already supports JWT assertions on the *middleware → IdP* hop via `clientAuthMethod: private_key_jwt` (see `docs/CONFIGURATION.md`). For the *client → IdP* hop, the same pattern applies — the client signs its own assertion.
|
||||
|
||||
### Operational notes
|
||||
|
||||
- **Token TTL is typically 1–24 hours.** Clients should refresh on `401`, not on a polling timer — saves the IdP.
|
||||
- **Cache and reuse tokens.** The middleware caches verified tokens too, so repeated presentations are cheap. Clients SHOULD reuse a token until ~80 % of `expires_in`.
|
||||
- **JWKS rotation is transparent.** The middleware auto-refreshes its JWKS cache when the IdP rotates keys. Clients don't need to do anything.
|
||||
- **Revocation is generally not per-token** with `client_credentials`. If you need real-time revocation, set `requireTokenIntrospection: true` on the middleware and the IdP is consulted on every cache miss.
|
||||
- **`scope` vs `audience`.** Scope says *what the client may do*; audience says *which service the token is for*. The middleware enforces audience; the backend service should enforce scope.
|
||||
- **Secret hygiene.** Store `client_secret` in a secrets manager (Vault, AWS Secrets Manager, Kubernetes `Secret`). For higher assurance, switch the client to `private_key_jwt` (no shared secret at all).
|
||||
|
||||
### Quickest validation loop
|
||||
|
||||
```bash
|
||||
# 1. Mint
|
||||
TOKEN=$(curl -s -X POST https://issuer.example.com/oauth/token \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"grant_type":"client_credentials","client_id":"…","client_secret":"…","audience":"https://api.example.com"}' \
|
||||
| jq -r .access_token)
|
||||
|
||||
# 2. Inspect claims to confirm aud/iss/exp match the middleware config
|
||||
echo "$TOKEN" | cut -d. -f2 | base64 -d 2>/dev/null | jq
|
||||
|
||||
# 3. Hit the protected route
|
||||
curl -i -H "Authorization: Bearer $TOKEN" https://api.example.com/protected
|
||||
```
|
||||
|
||||
`HTTP/1.1 200` with `X-Forwarded-User` on the backend confirms the loop works end-to-end. `401` with `WWW-Authenticate: Bearer error="invalid_token"` plus a middleware debug log explaining the rejection (audience mismatch, ID token presented, `iat` outside the 24h window, etc.) confirms the hardening is firing as designed.
|
||||
|
||||
## Threat model and design rules
|
||||
|
||||
Bearer authentication has materially different security properties from
|
||||
cookie sessions: no `HttpOnly`/`Secure`/`SameSite` shielding, the token is
|
||||
visible in headers and logs, and it's easier to exfiltrate. The bearer path
|
||||
treats every one of these as a first-class concern.
|
||||
|
||||
| Property | Behaviour | Why |
|
||||
|---|---|---|
|
||||
| Default state | `enableBearerAuth=false` | Bearer is opt-in; existing deployments observe no change. |
|
||||
| Audience | **Mandatory.** Startup fails if `audience` is empty when bearer is enabled. | Eliminates the "token issued for service B accepted by service A" confusion attack. |
|
||||
| Token format | JWT only (3 segments, JOSE-encoded). Opaque tokens are not accepted on the bearer path. | Matches the validation pipeline; opaque tokens require introspection only and bypass JWT-specific defences. |
|
||||
| `alg` allowlist | Hard-pinned asymmetric: `RS256/384/512`, `PS256/384/512`, `ES256/384/512`. Checked **before** any JWKS fetch. | Denies `alg=none` and `alg=HS*` probes; prevents attacker noise from amplifying into JWKS round-trips. |
|
||||
| `kid` hardening | Max 256 bytes; charset `[A-Za-z0-9._\-=]`. Checked **before** JWKS fetch. | Prevents cache-key explosion / pathological-`kid` JWKS amplification. |
|
||||
| Token type | ID tokens are explicitly rejected (`nonce` claim, `typ: at+jwt`, `token_use=id`, scope/aud heuristics — reuses the existing `detectTokenType` helper). | ID tokens are not API credentials; treating them as such is classic token confusion. |
|
||||
| Multi-audience | When `aud` is an array of length > 1, the token must carry `azp == clientID`. | OIDC §2 hardening against tokens minted for one client being replayed by another. |
|
||||
| `iat` upper-age | Rejects tokens older than `maxTokenAgeSeconds` (default 24h). | Bounds clock-manipulation / forever-token abuse, even if `exp` is far in the future. |
|
||||
| Identifier claim | `bearerIdentifierClaim` (default `"sub"`). Resolved value drives `X-Forwarded-User`. | Decoupled from the cookie path's `UserIdentifierClaim` (default `email`) so the M2M flow can never accidentally trust an unverified email. |
|
||||
| Identifier sanitisation | Length cap (`maxIdentifierLength`, default 256). Rejects control chars, Unicode bidi-overrides (U+202A–U+202E, U+2066–U+2069), and the delimiters `, ; =`. | Defence in depth against downstream header injection / log injection / admin-UI spoofing. |
|
||||
| JTI replay marking | Bearer path skips the JTI **Set** (so the same token can be reused until `exp`) but the **Get** stays active. | Allows legitimate bearer reuse without false-positive replay detection; revoked tokens (added to the blacklist by `RevokeToken`) still fail immediately. |
|
||||
| Mixed bearer + cookie | **Cookie wins by default.** Flip to bearer-wins with `bearerOverridesCookie=true`. | Safer against browser/extension/proxy bearer injection scenarios. The cookie is the authoritative authenticator when present. |
|
||||
| `Authorization` strip | `stripAuthorizationHeader=true` by default. | Keeps the raw token out of downstream services and their logs. |
|
||||
| Excluded URLs | `Authorization` is stripped on excluded paths when `enableBearerAuth=true`. | Prevents bearer leakage into public health/metrics endpoint logs and prevents recon via excluded paths. |
|
||||
| Per-IP throttle | After `bearerFailureThreshold` consecutive 401s from one source IP within `bearerFailureWindowSeconds`, further bearer requests from that IP return `429 Too Many Requests` + `Retry-After` for `bearerFailurePenaltySeconds`. | Limits offline-guessing-style attacks and protects the shared rate-limiter / JWKS endpoint. |
|
||||
| Optional introspection | `requireTokenIntrospection=true` calls RFC 7662 introspection on every cache miss. Introspection result is cached briefly. Endpoint failure returns `503` (distinguishes infra outage from credential rejection). | Real-time revocation for high-assurance environments. Adds per-request IdP latency. |
|
||||
| Response shape | `401 Unauthorized` with generic body. `WWW-Authenticate: Bearer error="invalid_token"` per RFC 6750 §3 (toggleable via `bearerEmitWWWAuthenticate`). `403` for roles/groups denial. `429` for throttle. `503` for introspection-endpoint outage. | Auditable from spec to code; reason categories never leak into the response body. |
|
||||
| Logging | Failure reason + identifier hash (SHA-256 truncated to 8 hex chars) logged at debug. Raw tokens are never logged. | Audit trail without secrets-in-logs. |
|
||||
|
||||
## Configuration reference
|
||||
|
||||
| Field | Default | Description |
|
||||
|---|---|---|
|
||||
| `enableBearerAuth` | `false` | Master switch for the bearer path. |
|
||||
| `audience` | (unset) | **Required** when `enableBearerAuth=true`. Reuses the existing global `audience` field. |
|
||||
| `bearerIdentifierClaim` | `"sub"` | JWT claim used as the principal identifier. `"email"` is rejected at startup. |
|
||||
| `stripAuthorizationHeader` | `true` | Remove the `Authorization` header before forwarding to the backend. Disable only when a downstream needs to re-verify the bearer. |
|
||||
| `bearerEmitWWWAuthenticate` | `true` | Include `WWW-Authenticate: Bearer error="..."` on 401 responses (RFC 6750 §3). Disable to reduce recon signal. |
|
||||
| `bearerOverridesCookie` | `false` | Cookie wins when both are present (default). Set `true` for the AWS/GCP/Kubernetes bearer-wins convention. |
|
||||
| `maxTokenAgeSeconds` | `86400` | Upper bound on `iat` claim age (24h). Set `0` to disable the check (not recommended). |
|
||||
| `maxIdentifierLength` | `256` | Length cap for the post-sanitisation identifier. |
|
||||
| `bearerFailureThreshold` | `20` | Consecutive 401s from one IP that trip the throttle. |
|
||||
| `bearerFailureWindowSeconds` | `60` | Rolling window over which 401s are counted. |
|
||||
| `bearerFailurePenaltySeconds` | `60` | Duration of the 429 penalty box after the threshold trips. |
|
||||
| `requireTokenIntrospection` | `false` | Call RFC 7662 introspection on every cache miss. Adds per-request IdP latency. |
|
||||
|
||||
## What the bearer path does NOT do
|
||||
|
||||
- **Human-user / browser flows.** The bearer path is M2M-only in this
|
||||
iteration. Browser SPAs that want to attach a bearer to fetch calls work
|
||||
if your backend treats them as machine clients, but the spec defaults are
|
||||
tuned for service-to-service traffic.
|
||||
- **Opaque access tokens.** Tokens must be JWTs. Introspection is a
|
||||
revocation overlay on top of JWT verification, not a substitute for it.
|
||||
- **`email_verified` enforcement.** The bearer path rejects `email` as the
|
||||
identifier claim at startup precisely because `email_verified` is not
|
||||
enforced in this iteration. Adding human-user bearer support is a
|
||||
follow-up that must include this check.
|
||||
- **mTLS / API keys.** Out of scope. The `principal` abstraction enables
|
||||
adding these later as additional auth methods that produce a principal
|
||||
for the shared `forwardAuthorized` pipeline.
|
||||
- **SSE / WebSocket bypass with bearer.** Bypass paths keep their existing
|
||||
cookie-only behaviour; bearer headers are ignored on those endpoints.
|
||||
Documented limitation; widen by removing the bypass if you need bearer on
|
||||
streaming endpoints.
|
||||
|
||||
## Operational guidance
|
||||
|
||||
- **Always set `strictAudienceValidation: true` when bearer is enabled.**
|
||||
Startup logs a recommendation if you don't.
|
||||
- **Set a tight `maxTokenAgeSeconds`** for environments where tokens are
|
||||
expected to be minted frequently — the default 24h is conservative.
|
||||
- **Enable `requireTokenIntrospection`** if your IdP supports it and
|
||||
revocation latency matters. Bearer-path introspection caches results for
|
||||
a short window per token.
|
||||
- **Monitor 429s.** Sustained 429 traffic indicates either a buggy client
|
||||
loop or an active credential-stuffing attempt. The throttle is your
|
||||
primary signal for both.
|
||||
- **`stripAuthorizationHeader=false` extends the token's blast radius** to
|
||||
every downstream service that sees the request. Treat those services'
|
||||
logs as token stores.
|
||||
- **Bearer reuse is normal.** Don't enable per-token rate limiting; that's
|
||||
what `bearerFailureThreshold` is for (per-IP, not per-token).
|
||||
- **Cookie-wins is the safer default.** Only flip `bearerOverridesCookie`
|
||||
if you control all clients and have audited that none of them present a
|
||||
cookie alongside a bearer they don't intend to authenticate with.
|
||||
|
||||
## Failure response matrix
|
||||
|
||||
| Trigger | Status | Body | `WWW-Authenticate` |
|
||||
|---|---|---|---|
|
||||
| Empty bearer after prefix | 401 | `Unauthorized` | `Bearer error="invalid_request"` |
|
||||
| Token over `MaxLength` | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Not a 3-segment JWT | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Disallowed `alg` (e.g. none, HS*) | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Missing / oversized / bad-charset `kid` | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Signature / issuer / audience / `exp` failure | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| `iat` older than `maxTokenAgeSeconds` | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Multi-audience token without matching `azp` | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Detected as ID token | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| JTI blacklisted (revoked) | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Introspection reports `active=false` | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Introspection endpoint failure | 503 | `Service Unavailable` | (none) |
|
||||
| Identifier claim missing / empty | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Identifier fails sanitisation | 401 | `Unauthorized` | `Bearer error="invalid_token"` |
|
||||
| Per-IP failure threshold tripped | 429 | `Too Many Requests` | (none); `Retry-After: <bearerFailurePenaltySeconds>` |
|
||||
| Roles / groups not allowed | 403 | `Access denied` | (none) |
|
||||
|
||||
## Known follow-ups (deferred)
|
||||
|
||||
These are documented as future work, not blockers:
|
||||
|
||||
- **Human-user bearer with `email_verified` enforcement.** Requires
|
||||
decoupling the email-claim guard from the startup rejection and adding a
|
||||
per-request `email_verified=true` check.
|
||||
- **Introspection respects `client_assertion`.** The existing introspection
|
||||
helper uses `client_secret_basic` only; operators on `private_key_jwt`
|
||||
will see introspection silently use basic auth.
|
||||
- **Per-route bearer configuration.** Single middleware-wide setting in this
|
||||
iteration.
|
||||
|
||||
## References
|
||||
|
||||
- [PR design spec](superpowers/specs/2026-05-18-bearer-token-auth-design.md) — full design rationale, alternatives considered, and per-section sign-off history.
|
||||
- [RFC 6750](https://www.rfc-editor.org/rfc/rfc6750) — Bearer Token Usage.
|
||||
- [RFC 7662](https://www.rfc-editor.org/rfc/rfc7662) — OAuth 2.0 Token Introspection.
|
||||
- [RFC 9068](https://www.rfc-editor.org/rfc/rfc9068) — JWT Profile for OAuth 2.0 Access Tokens.
|
||||
@@ -261,6 +261,26 @@ strictAudienceValidation: true
|
||||
| `disableReplayDetection` | bool | `false` | Disable JTI-based replay attack detection |
|
||||
| `allowPrivateIPAddresses` | bool | `false` | Allow private IPs in provider URLs |
|
||||
|
||||
### Bearer-token (M2M) authentication
|
||||
|
||||
Opt-in path that accepts `Authorization: Bearer <jwt>` instead of the cookie
|
||||
session flow. M2M-only, default off, audience-mandatory. See
|
||||
[docs/BEARER_AUTH.md](BEARER_AUTH.md) for the threat model and operational
|
||||
guidance.
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `enableBearerAuth` | bool | `false` | Master switch. Startup fails if true with empty `audience` or with `bearerIdentifierClaim=email`. |
|
||||
| `bearerIdentifierClaim` | string | `"sub"` | JWT claim used as the principal identifier. `"email"` is rejected at startup. |
|
||||
| `stripAuthorizationHeader` | bool | `true` | Strip `Authorization` from forwarded requests after successful bearer auth. |
|
||||
| `bearerEmitWWWAuthenticate` | bool | `true` | Emit RFC 6750 `WWW-Authenticate: Bearer error="..."` hints on 401. |
|
||||
| `bearerOverridesCookie` | bool | `false` | Cookie wins when both bearer and cookie are present (default). Set true for bearer-wins. |
|
||||
| `maxTokenAgeSeconds` | int64 | `86400` | Upper bound on `iat` claim age (24h). 0 disables the check. |
|
||||
| `maxIdentifierLength` | int | `256` | Length cap on the sanitised principal identifier. |
|
||||
| `bearerFailureThreshold` | int | `20` | Consecutive 401s from one source IP that trip the throttle. |
|
||||
| `bearerFailureWindowSeconds` | int | `60` | Rolling window for counting 401s. |
|
||||
| `bearerFailurePenaltySeconds` | int | `60` | 429 + `Retry-After` duration after the threshold trips. |
|
||||
|
||||
---
|
||||
|
||||
## Session Management
|
||||
|
||||
@@ -0,0 +1,459 @@
|
||||
# Bearer Token Authentication — Design Spec
|
||||
|
||||
- **Date**: 2026-05-18
|
||||
- **Status**: Design — pending implementation plan
|
||||
- **Supersedes**: PR #93 (broken implementation; recommended to close in favour of this design)
|
||||
|
||||
## 1. Summary
|
||||
|
||||
Add an opt-in path that lets API clients (machine-to-machine) authenticate by presenting a signed access token in the `Authorization: Bearer <token>` header, bypassing the cookie-based OIDC redirect flow. Identity, roles, and authorization checks remain consistent with the existing cookie path; the only thing that changes is how the principal is established for that single request.
|
||||
|
||||
The feature is implemented by extracting a shared `forwardAuthorized` pipeline from the existing `processAuthorizedRequest`, introducing a `principal` value type, and adding a small bearer-specific entrypoint that builds a principal directly from a verified JWT — without synthesising a fake `SessionData`.
|
||||
|
||||
## 2. Motivation
|
||||
|
||||
PR #93 attempted this feature by building an in-memory `SessionData` from JWT claims and reusing `processAuthorizedRequest`. The approach has three latent defects:
|
||||
|
||||
1. The synthetic session omits `mainSession.Values["user_identifier"]`. `processAuthorizedRequest` reads it via `GetUserIdentifier()`; when empty it bails to `defaultInitiateAuthentication` and issues an OIDC redirect. The feature is non-functional in practice despite the unit test passing.
|
||||
2. `verifyToken` accepts both ID tokens (audience match against `clientID`) and access tokens. ID tokens are not API credentials; treating them as such is a classic token-confusion vector.
|
||||
3. `verifyToken` adds JTI to the replay blacklist on first verify. Once the verified-token cache evicts, subsequent reuse of the same bearer token triggers a false-positive replay rejection.
|
||||
|
||||
Rather than patch a synthetic-session approach that will keep generating bugs as `SessionData` evolves, this spec replaces it with a cleaner abstraction where session lifecycle and post-auth header injection live in separate units.
|
||||
|
||||
## 3. Goals
|
||||
|
||||
- Accept `Authorization: Bearer <jwt>` from M2M clients, validate the token, and forward the request downstream with identity headers populated.
|
||||
- Enforce the same `allowedRolesAndGroups` policy as the cookie path.
|
||||
- Default-off; safe defaults when enabled (audience required, ID tokens rejected, identifier sanitised).
|
||||
- No behavioural change to the cookie path. Existing tests must continue to pass without modification.
|
||||
|
||||
## 4. Non-Goals
|
||||
|
||||
- Human-user / browser flows. Bearer is M2M-only in this iteration.
|
||||
- Pure opaque access tokens on the bearer path. Tokens must be JWTs; introspection (RFC 7662) is supported *on top of* JWT verification for revocation state, not as a substitute for it.
|
||||
- mTLS, API keys, or any other auth method. The `principal` abstraction enables them later, but they are not delivered here.
|
||||
- Per-route bearer configuration. Single middleware-wide setting.
|
||||
|
||||
## 5. Decided Requirements
|
||||
|
||||
| Topic | Decision |
|
||||
|---|---|
|
||||
| Consumer type | Machine-to-machine (M2M) only |
|
||||
| Token format | JWT only (signature, issuer, audience, exp) |
|
||||
| Audience | Mandatory when feature enabled; startup fails if `Audience == ""` |
|
||||
| Token type | Access tokens only; ID tokens explicitly rejected |
|
||||
| Revocation | JWT-only verification by default; introspection (RFC 7662) opt-in via existing `RequireTokenIntrospection` |
|
||||
| Identity claim | New `BearerIdentifierClaim` config (string, default `"sub"`). Bearer path reads this claim exclusively; does NOT use `UserIdentifierClaim` (which defaults to `"email"` and drives the cookie path). Resolved value must be a non-empty string. `sub` is mandatory per `jwt.go:416` regardless, so even with a different `BearerIdentifierClaim` the token must still carry a valid `sub`. Decoupling avoids the M2M-vs-human-user identity-claim conflict and the email-spoofing footgun. |
|
||||
| Identifier sanitisation | Reject value containing any `unicode.IsControl` char, any Unicode bidi-override (U+202A–U+202E, U+2066–U+2069), leading/trailing whitespace, commas, semicolons, equals signs. Max length 256 bytes. |
|
||||
| Token classifier | **Reuse existing `detectTokenType(jwt, token)` at `token_manager.go:187-303`** which already handles `nonce`, `typ: at+jwt`, `token_use`, `scope`, and aud-vs-clientID priority. Bearer path rejects any token where `detectTokenType == true` (ID token). Do not invent a parallel classifier. |
|
||||
| Algorithm pinning | Hard-pin `alg ∈ {RS256, RS384, RS512, PS256, PS384, PS512, ES256, ES384, ES512}`, enforced **before** JWKS lookup on the bearer path. Prevents wasted JWKS fetches for `alg=none`/HS attacker probes. |
|
||||
| `kid` hardening | `kid` ≤ 256 bytes, charset `[A-Za-z0-9._\-=]`. Reject before JWKS lookup. |
|
||||
| Token age | Bearer path enforces `now - iat <= MaxTokenAgeSeconds` (default 86400 / 24h, configurable). Cookie path unchanged. |
|
||||
| Multi-audience policy | If `aud` is an array (length > 1), require `azp` claim to be present and equal to `clientID`. Single-string `aud` unaffected. |
|
||||
| Mixed bearer + cookie precedence | **Cookie wins by default** when both are presented (safer for browser scenarios). Operator opt-in: `BearerOverridesCookie=true` to flip. Either way, a warning is logged on the request. |
|
||||
| Bearer + excluded URL | `Authorization` header is **stripped** before forwarding when the request hits an excluded URL. Prevents bearer leaking into public endpoints' downstream logs and prevents recon via excluded paths. |
|
||||
| Per-source bearer 401 throttle | New sharded cache `failedBearerAttempts` keyed by client IP. After N (default 20) consecutive 401s from one IP within 1 minute, reject further bearer requests from that IP with 429 for 60s. Applied BEFORE `verifyToken` to deny JWKS amplification. |
|
||||
| `Authorization` header passthrough | New `StripAuthorizationHeader` config, default `true` |
|
||||
| Roles/groups gating | Same `allowedRolesAndGroups` rules as cookie path |
|
||||
| Default state | `EnableBearerAuth` = `false` |
|
||||
| JTI replay marking | Suppressed on bearer path; cookie path unchanged |
|
||||
| Failure response shape | 401 with generic body; `WWW-Authenticate: Bearer error="invalid_token"` per RFC 6750 |
|
||||
| Introspection endpoint outage | 503 (distinguishes infra outage from token rejection) |
|
||||
| Mixed bearer + cookie | Bearer wins; cookie ignored on that request |
|
||||
| SSE/WS bypass + bearer | Bypass paths keep cookie-only check; bearer header ignored on SSE/WS |
|
||||
|
||||
## 6. Architecture
|
||||
|
||||
```
|
||||
┌──────────────────┐
|
||||
HTTP req ──► │ ServeHTTP │ (existing entry; adds bearer detection)
|
||||
└─────────┬────────┘
|
||||
┌───────────┴────────────┐
|
||||
▼ ▼
|
||||
cookie / session bearer (Authorization: Bearer …)
|
||||
│ │
|
||||
▼ ▼
|
||||
┌────────────────┐ ┌────────────────────┐
|
||||
│ buildPrincipal │ │ buildPrincipal │
|
||||
│ FromSession() │ │ FromBearerToken() │
|
||||
└────────┬───────┘ └─────────┬──────────┘
|
||||
│ produces *principal │
|
||||
└──────────────┬───────────┘
|
||||
▼
|
||||
┌────────────────────────────┐
|
||||
│ forwardAuthorized(rw,req,p)│ (shared pipeline)
|
||||
│ • roles/groups gate │
|
||||
│ • header injection │
|
||||
│ • header templates │
|
||||
│ • security headers │
|
||||
│ • cookie stripping │
|
||||
│ • next.ServeHTTP │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
**Invariant**: `forwardAuthorized` never touches session storage. Session-specific concerns (Save, IsDirty, backchannel-logout invalidation) stay inside `processAuthorizedRequest` around the call to `forwardAuthorized`.
|
||||
|
||||
**Feature gate**: when `EnableBearerAuth == false`, the bearer-detection check in `ServeHTTP` is a no-op. Existing deployments observe byte-identical behaviour.
|
||||
|
||||
## 7. Components
|
||||
|
||||
### 7.1 `principal` type (new file `principal.go`)
|
||||
|
||||
```go
|
||||
type principalSource int
|
||||
|
||||
const (
|
||||
sourceSession principalSource = iota
|
||||
sourceBearer
|
||||
)
|
||||
|
||||
type principal struct {
|
||||
Identifier string // drives X-Forwarded-User
|
||||
Email string // optional, "" for M2M
|
||||
Subject string // sub claim
|
||||
ClientID string // azp / client_id, M2M caller
|
||||
Claims map[string]interface{} // raw claims for templates / groups
|
||||
AccessToken string // for X-Auth-Request-Token (gated by minimalHeaders)
|
||||
IDToken string // "" on bearer path
|
||||
RefreshToken string // "" on bearer path
|
||||
Source principalSource
|
||||
}
|
||||
```
|
||||
|
||||
Pure data. No methods that mutate it. No I/O. No manager pointer.
|
||||
|
||||
### 7.2 `buildPrincipalFromSession(*SessionData) *principal` (new in `principal.go`)
|
||||
|
||||
Read-only adapter over existing `SessionData` getters: `GetUserIdentifier`, `GetEmail`, `GetAccessToken`, `GetIDToken`, `GetRefreshToken`, cached claims via `GetIDTokenClaims`. Does not write back to the session. This is the only function that still knows about `SessionData`.
|
||||
|
||||
### 7.3 `buildPrincipalFromBearerToken(token string) (*principal, error)` (new in `bearer_auth.go`)
|
||||
|
||||
1. **Length / format guards**: `len(token) <= AccessTokenConfig.MaxLength`, exactly two dots, non-empty after trim.
|
||||
2. **Parse header for early alg/kid pinning** (without trusting payload): decode JOSE header; reject if `alg` ∉ asymmetric allowlist; reject if `kid` missing, > 256 bytes, or contains chars outside `[A-Za-z0-9._\-=]`. This happens **before** JWKS lookup so attacker noise doesn't amplify into JWKS fetches.
|
||||
3. **Per-IP 401 throttle check**: if this IP is in the `failedBearerAttempts` penalty box, return 429 immediately.
|
||||
4. `t.verifyToken(token, verifyOpts{skipReplayMarking: true})` — reuses signature, issuer, audience, expiration, JTI Get (replay detection). The `skipReplayMarking` flag gates ONLY the JTI Set at `token_manager.go:108-143`; the JTI Get at `token_manager.go:44-47, 80-89` remains active so revoked tokens (via `RevokeToken` adding to blacklist) are still rejected.
|
||||
5. **Re-parse claims** (`parseJWT(token)` is cheap and already done internally; reuse via a single decode if practical).
|
||||
6. **Token-type guard**: call existing `detectTokenType(jwt, token)` (`token_manager.go:187-303`). Reject when it returns `true` (ID token). Belt-and-braces: also reject if `claims["nonce"]` is a non-empty string or `claims["token_use"] == "id"`.
|
||||
7. **Multi-audience hardening**: if `claims["aud"]` is a `[]interface{}` with length > 1, require `claims["azp"]` to be a non-empty string equal to `t.clientID`; reject otherwise.
|
||||
8. **`iat` upper-age bound**: reject when `time.Now().Unix() - int64(claims["iat"].(float64)) > MaxTokenAgeSeconds` (default 86400).
|
||||
9. **Optional introspection**: if `requireTokenIntrospection` is set, call `introspectToken`; reject if `active == false` (401); surface 503 on transport failure. Bearer-path introspection cache TTL is capped at 60s (not 5min) to keep the "real-time revocation" promise close to true.
|
||||
10. **Identifier resolution**: read `t.bearerIdentifierClaim` (defaults to `"sub"`); do NOT use `t.userIdentifierClaim` (cookie path's setting, default `email`). The bearer path does NOT fall back to other claims because `jwt.Verify` already enforces non-empty `sub` (`jwt.go:416-419`). Empty/missing identifier → 401.
|
||||
11. **Identifier sanitisation**: trim, then reject if length > 256 OR contains any of: `unicode.IsControl`, bidi-override (U+202A–U+202E, U+2066–U+2069), `,`, `;`, `=`.
|
||||
12. Return `&principal{ Source: sourceBearer, … }`.
|
||||
|
||||
On any failure path: increment the per-IP `failedBearerAttempts` counter; return the appropriate HTTP status (401 / 403 / 429 / 503) without revealing the failure reason in the response body. Reason is logged at debug only, with the identifier (if resolved) hashed via SHA-256 truncated to 8 hex chars.
|
||||
|
||||
### 7.4 `forwardAuthorized(rw, req, *principal)` (new in `middleware.go`, extracted)
|
||||
|
||||
The shared post-auth pipeline. Lifted verbatim from the existing `processAuthorizedRequest`:
|
||||
|
||||
1. Roles/groups extraction via existing `extractGroupsAndRolesFromClaims`.
|
||||
2. `allowedRolesAndGroups` gate (existing logic).
|
||||
3. Inject `X-Forwarded-User`, `X-User-Groups`, `X-User-Roles`.
|
||||
4. Inject `X-Auth-Request-*` (gated by `minimalHeaders`).
|
||||
5. Header templates.
|
||||
6. Security headers.
|
||||
7. Cookie strip when `stripAuthCookies`.
|
||||
8. **New**: `Authorization` header strip when `stripAuthorizationHeader` AND `principal.Source == sourceBearer`.
|
||||
9. `t.next.ServeHTTP(rw, req)`.
|
||||
|
||||
Does not call `Save`, does not check `IsDirty`. Session persistence stays with the cookie-path caller.
|
||||
|
||||
### 7.5 `handleBearerRequest(rw, req)` (new in `bearer_auth.go`)
|
||||
|
||||
```
|
||||
1. Detect "Authorization: Bearer <token>" (case-insensitive prefix).
|
||||
2. token = TrimSpace(authHeader[7:]); reject empty.
|
||||
3. p, err := buildPrincipalFromBearerToken(token).
|
||||
On err → 401 with WWW-Authenticate, log reason at debug.
|
||||
4. forwardAuthorized(rw, req, p).
|
||||
```
|
||||
|
||||
Target: ~40 lines.
|
||||
|
||||
### 7.6 Refactor of `processAuthorizedRequest` (modify `middleware.go`)
|
||||
|
||||
Splits along the principal boundary:
|
||||
- Session-specific part (backchannel-logout invalidation, `IsDirty` / `Save`) stays in `processAuthorizedRequest`.
|
||||
- Everything else moves to `forwardAuthorized`.
|
||||
- `processAuthorizedRequest` ends with `forwardAuthorized(rw, req, buildPrincipalFromSession(session))`.
|
||||
|
||||
### 7.7 `verifyOpts` extension to `verifyToken` (modify `token_manager.go`)
|
||||
|
||||
Add a parameter struct:
|
||||
```go
|
||||
type verifyOpts struct {
|
||||
skipReplayMarking bool // suppress JTI Set (token_manager.go:108-143); blacklist Get stays active
|
||||
}
|
||||
```
|
||||
|
||||
Both the type and field are unexported (internal-only knob). Signature change: `verifyToken(token string)` becomes `verifyToken(token string, opts verifyOpts)`. Existing callers pass `verifyOpts{}` (zero value = current behaviour). Bearer path passes `verifyOpts{skipReplayMarking: true}`.
|
||||
|
||||
**Critical semantics — must be reflected in implementation and tests:**
|
||||
- `skipReplayMarking` only gates the **Set** at `token_manager.go:108-143` (the call adding the JTI to the blacklist and replay cache).
|
||||
- The blacklist **Get** at `token_manager.go:44-47, 80-89` stays unconditionally active on the bearer path. Tokens revoked via `RevokeToken` (which adds the JTI to the blacklist) MUST still be rejected on the bearer path.
|
||||
- Must NOT be implemented by mutating `t.disableReplayDetection` (struct field) — that would create a cross-request race that disables replay protection globally.
|
||||
|
||||
A targeted regression test exercises: bearer token verified once → admin calls `RevokeToken` adding the JTI to the blacklist → same token replayed → 401.
|
||||
|
||||
### 7.8 Config additions (modify `settings.go`)
|
||||
|
||||
```go
|
||||
EnableBearerAuth bool `json:"enableBearerAuth,omitempty"`
|
||||
BearerIdentifierClaim string `json:"bearerIdentifierClaim,omitempty"`
|
||||
StripAuthorizationHeader bool `json:"stripAuthorizationHeader,omitempty"`
|
||||
BearerEmitWWWAuthenticate bool `json:"bearerEmitWWWAuthenticate,omitempty"`
|
||||
BearerOverridesCookie bool `json:"bearerOverridesCookie,omitempty"`
|
||||
MaxTokenAgeSeconds int64 `json:"maxTokenAgeSeconds,omitempty"`
|
||||
MaxIdentifierLength int `json:"maxIdentifierLength,omitempty"`
|
||||
BearerFailureThreshold int `json:"bearerFailureThreshold,omitempty"`
|
||||
BearerFailureWindowSeconds int `json:"bearerFailureWindowSeconds,omitempty"`
|
||||
BearerFailurePenaltySeconds int `json:"bearerFailurePenaltySeconds,omitempty"`
|
||||
```
|
||||
|
||||
Defaults (applied in `CreateConfig` for the bearer-related fields; values >0 only honoured when `EnableBearerAuth=true`):
|
||||
- `EnableBearerAuth`: `false`.
|
||||
- `BearerIdentifierClaim`: `"sub"`.
|
||||
- `StripAuthorizationHeader`: `true`.
|
||||
- `BearerEmitWWWAuthenticate`: `true` (RFC 6750 hint enabled by default; flip to false if recon-exposure is a concern).
|
||||
- `BearerOverridesCookie`: `false` (cookie wins when both present; flip to `true` for the legacy/industry-default behaviour).
|
||||
- `MaxTokenAgeSeconds`: `86400` (24h upper bound on `iat`).
|
||||
- `MaxIdentifierLength`: `256`.
|
||||
- `BearerFailureThreshold`: `20` (consecutive 401s per IP before throttle).
|
||||
- `BearerFailureWindowSeconds`: `60`.
|
||||
- `BearerFailurePenaltySeconds`: `60` (429 reply for this long after threshold tripped).
|
||||
|
||||
### 7.9 Startup validation (modify `main.go` `New()`)
|
||||
|
||||
- `EnableBearerAuth && Audience == ""` → fatal error.
|
||||
- `EnableBearerAuth && !StrictAudienceValidation` → warning log (recommended hardening).
|
||||
- `EnableBearerAuth && BearerIdentifierClaim == "email"` → fatal error (the bearer path is M2M and an `email` identifier without `email_verified` enforcement is a spoofing vector; default `BearerIdentifierClaim=sub` avoids this; explicit override to `email` is rejected).
|
||||
- `EnableBearerAuth && MaxTokenAgeSeconds <= 0` → reset to default 86400 with info log.
|
||||
- `EnableBearerAuth && BearerFailureThreshold <= 0` → reset to default 20 with info log.
|
||||
|
||||
## 8. Data Flow
|
||||
|
||||
### 8.1 Bearer path
|
||||
|
||||
```
|
||||
ServeHTTP entry (pre-init paths unchanged: logout, backchannel, frontchannel, excluded URLs, SSE/WS bypass)
|
||||
│
|
||||
├─ enableBearerAuth == false? → fall through to cookie path
|
||||
│
|
||||
└─ enableBearerAuth == true AND Authorization starts with "Bearer "
|
||||
│
|
||||
▼
|
||||
handleBearerRequest
|
||||
│
|
||||
├─ format guards (empty, length, segment count)
|
||||
│
|
||||
▼
|
||||
verifyToken(token, verifyOpts{SkipReplayMarking: true})
|
||||
│ signature, issuer, audience (strict), exp
|
||||
│
|
||||
▼
|
||||
classifyToken(claims) → reject ID tokens
|
||||
│
|
||||
▼
|
||||
if requireTokenIntrospection: introspectToken → active check
|
||||
│
|
||||
▼
|
||||
resolveIdentifier(claims) → sanitiseIdentifier
|
||||
│
|
||||
▼
|
||||
principal{Source: sourceBearer, …}
|
||||
│
|
||||
▼
|
||||
forwardAuthorized(rw, req, principal)
|
||||
│
|
||||
├─ roles/groups gate (403 on deny)
|
||||
├─ header injection
|
||||
├─ header templates
|
||||
├─ security headers
|
||||
├─ strip OIDC cookies (existing)
|
||||
├─ strip Authorization header (new, when configured)
|
||||
└─ next.ServeHTTP(rw, req)
|
||||
```
|
||||
|
||||
### 8.2 Cookie path (refactored, semantically unchanged)
|
||||
|
||||
```
|
||||
processAuthorizedRequest
|
||||
1. Session validity / backchannel-logout invalidation (unchanged).
|
||||
2. principal := buildPrincipalFromSession(session).
|
||||
3. forwardAuthorized(rw, req, principal).
|
||||
4. if session.IsDirty(): session.Save().
|
||||
```
|
||||
|
||||
## 9. Error Handling
|
||||
|
||||
| Trigger | Status | Body | WWW-Authenticate | Debug log reason |
|
||||
|---|---|---|---|---|
|
||||
| Empty bearer after prefix | 401 | `Unauthorized` | `Bearer error="invalid_request"` | empty bearer token |
|
||||
| Token over MaxLength | 401 | `Unauthorized` | `Bearer error="invalid_token"` | token exceeds max length |
|
||||
| Not a 3-segment JWT | 401 | `Unauthorized` | `Bearer error="invalid_token"` | malformed JWT |
|
||||
| Disallowed `alg` (e.g. none, HS*) | 401 | `Unauthorized` | `Bearer error="invalid_token"` | unsupported alg |
|
||||
| Missing/oversized/bad-charset `kid` | 401 | `Unauthorized` | `Bearer error="invalid_token"` | invalid kid |
|
||||
| Signature / issuer / aud / exp fail | 401 | `Unauthorized` | `Bearer error="invalid_token"` | reason from verifyToken (category only) |
|
||||
| `iat` older than MaxTokenAgeSeconds | 401 | `Unauthorized` | `Bearer error="invalid_token"` | token too old (iat outside age bound) |
|
||||
| Multi-aud without matching `azp` | 401 | `Unauthorized` | `Bearer error="invalid_token"` | multi-aud token without azp match |
|
||||
| Detected as ID token | 401 | `Unauthorized` | `Bearer error="invalid_token"` | ID tokens not accepted on bearer path |
|
||||
| JTI blacklisted (revoked) | 401 | `Unauthorized` | `Bearer error="invalid_token"` | token JTI in blacklist |
|
||||
| Introspection `active=false` | 401 | `Unauthorized` | `Bearer error="invalid_token"` | token inactive at IdP |
|
||||
| Introspection endpoint failure | 503 | `Service Unavailable` | (none) | introspection unavailable |
|
||||
| Identifier claim missing/empty | 401 | `Unauthorized` | `Bearer error="invalid_token"` | no identifier claim |
|
||||
| Identifier fails sanitisation | 401 | `Unauthorized` | `Bearer error="invalid_token"` | invalid identifier characters |
|
||||
| Per-IP failure threshold tripped | 429 | `Too Many Requests` | (none); `Retry-After: <BearerFailurePenaltySeconds>` | source IP in penalty box |
|
||||
| Roles/groups not allowed | 403 | `Access denied` | (none) | user not in allowedRolesAndGroups |
|
||||
|
||||
Responses never include token contents, never include the raw failure reason, and never set `Location` headers (API clients cannot follow redirects).
|
||||
|
||||
## 10. Edge Cases
|
||||
|
||||
1. **Both bearer header and cookie session present.** Cookie wins by default (safer against browser/extension/proxy bearer injection). `BearerOverridesCookie=true` flips to bearer-wins. Either way: WARN log includes both source markers so operators can audit.
|
||||
2. **`Authorization: Basic …`.** Not bearer; cookie path runs as today.
|
||||
3. **`Authorization: Bearer ` (trailing space, no value).** Empty after trim → 401.
|
||||
4. **Mixed-case prefix (`bearer`, `BEARER`, `BeArEr`).** Case-insensitive prefix check; token value preserved verbatim.
|
||||
5. **Multiple `Authorization` headers.** Use only the first (Go `http.Header.Get` default). Documented.
|
||||
6. **Bearer during OIDC init wait.** Bearer requests also block on init: we need `issuerURL`, `audience`, JWKs ready. If init fails, bearer requests return 503 just like cookie requests.
|
||||
7. **SSE / WebSocket bypass with bearer.** Bypass paths keep cookie-only behaviour. Operators who want bearer on streaming endpoints must remove SSE/WS bypass. Documented.
|
||||
8. **Logout endpoint with bearer.** Logout runs before bearer detection. Treated as cookie-session logout; bearer token revocation requires IdP-side action.
|
||||
9. **Excluded URLs with bearer.** Bypass excluded URLs as today; bearer not validated on excluded paths. ADDITIONALLY: `Authorization: Bearer` is stripped from the request before forwarding so the token can't leak into the excluded endpoint's downstream logs / metrics scrapers / health checks.
|
||||
10. **Concurrent identical bearer requests.** Existing `tokenCache` is concurrency-safe; no new locking.
|
||||
11. **Client rotates token between requests.** Independent verification per token; independent cache entries.
|
||||
12. **Clock skew.** Use existing `jwt.Verify` leeway. (If absent, add ±30s as a separate change; out of scope here.)
|
||||
|
||||
## 11. Testing Strategy
|
||||
|
||||
### 11.1 Integration tests (new `bearer_auth_test.go`)
|
||||
|
||||
Table-driven test against a real `httptest.Server` and the full `ServeHTTP` flow. Coverage matrix:
|
||||
|
||||
- Valid access token + allowed roles → 200, `next` ran, `X-Forwarded-User` set.
|
||||
- Valid token without configured roles → 200.
|
||||
- Wrong audience, expired, tampered signature → 401, `next` did not run.
|
||||
- ID token presented → 401 (`ID tokens not accepted`).
|
||||
- Malformed JWT (2 segments) → 401.
|
||||
- Oversized token (> MaxLength) → 401.
|
||||
- Empty bearer → 401.
|
||||
- Missing identifier claim → 401.
|
||||
- Identifier containing `\r\n` → 401.
|
||||
- `allowedRolesAndGroups` mismatch → 403.
|
||||
- `allowedRolesAndGroups` match → 200.
|
||||
- `EnableBearerAuth=false` + bearer header → cookie path runs (302 to `/authorize`).
|
||||
- Bearer + valid cookie session → bearer wins, 200.
|
||||
- `StripAuthorizationHeader=true` → downstream sees no `Authorization`.
|
||||
- `StripAuthorizationHeader=false` → downstream sees `Authorization`.
|
||||
- Case variants (`bearer`, `BEARER`) → 200.
|
||||
- SSE bypass + bearer → cookie-only check applies (bearer ignored).
|
||||
- **Replay regression**: same token 1000 times in a row → all 200.
|
||||
- **Cache-evict regression**: same token, force-evict `tokenCache` between iterations (call `tokenCache.Delete` directly), replay → still 200 (verifies `skipReplayMarking` doesn't poison the blacklist).
|
||||
- **Revocation-while-bearer regression**: bearer token verified once → admin calls `RevokeToken` adding JTI to blacklist → same token presented → 401 (verifies blacklist Get stays active on bearer path even with `skipReplayMarking` set).
|
||||
- **Alg-pin: token signed with `alg=none`** → 401, no JWKS fetch happens (verify with a counting mock).
|
||||
- **`kid` injection: 50KB random kid** → 401 immediately, no JWKS fetch.
|
||||
- **Per-IP throttle**: 21 bad bearer requests from same IP within 1 minute → 22nd returns 429 + Retry-After.
|
||||
- **`iat` upper-age**: token with `iat = now - 25h` → 401 (older than 24h default).
|
||||
- **Multi-aud without azp**: aud = `["a", "b"]`, no azp → 401.
|
||||
- **Multi-aud with matching azp**: aud = `["api-aud", "other"]`, azp = clientID → 200.
|
||||
- **Identifier with bidi-override**: sub contains U+202E → 401.
|
||||
- **Identifier with comma**: sub = `"alice,bob"` → 401.
|
||||
- **Identifier over 256 bytes** → 401.
|
||||
- **`UserIdentifierClaim=email` at startup with EnableBearerAuth=true** → startup fails.
|
||||
- **Excluded URL + bearer**: bearer header presented on excluded URL → request forwarded, downstream sees no `Authorization` header (stripped).
|
||||
|
||||
### 11.2 Unit tests (in `bearer_auth_test.go`)
|
||||
|
||||
- `classifyToken`: ID-token detection, access-token detection by `scope`/`scp`/`token_use`, ambiguous → reject.
|
||||
- `resolveIdentifier`: precedence (`userIdentifierClaim` → `sub` → `client_id`/`azp`); missing → error; empty string → error.
|
||||
- `sanitizeIdentifier`: rejects all `unicode.IsControl`; accepts email/sub-style values.
|
||||
|
||||
### 11.3 Introspection tests (`bearer_auth_introspection_test.go`)
|
||||
|
||||
- Token valid + introspection `active=true` → 200.
|
||||
- Token valid + introspection `active=false` → 401.
|
||||
- Introspection endpoint 500 → 503.
|
||||
- Second request hits introspection cache (no second HTTP call).
|
||||
|
||||
### 11.4 Startup validation tests (extend `settings_test.go` / `main_test.go`)
|
||||
|
||||
- `EnableBearerAuth=true, Audience=""` → `New()` errors.
|
||||
- `EnableBearerAuth=true, StrictAudienceValidation=false` → succeeds with warning.
|
||||
- `EnableBearerAuth=false` → no validation; existing tests untouched.
|
||||
|
||||
### 11.5 Cookie-path regression suite
|
||||
|
||||
- All existing `TestServeHTTP_*` tests in `main_servehttp_test.go` pass unmodified.
|
||||
- Add: cookie session, `EnableBearerAuth=true`, no bearer header → identical behaviour to baseline.
|
||||
- Add: dirty session still triggers `Save()` after refactor.
|
||||
|
||||
### 11.6 Principal invariants
|
||||
|
||||
- `buildPrincipalFromSession`: `Source == sourceSession`; `IDToken` / `RefreshToken` populated when present in session.
|
||||
- `buildPrincipalFromBearerToken`: `Source == sourceBearer`; `IDToken == ""`, `RefreshToken == ""`.
|
||||
- `forwardAuthorized` produces identical headers for equivalent principals regardless of source.
|
||||
|
||||
### 11.7 Coverage gate
|
||||
|
||||
- New code in `bearer_auth.go` and `principal.go`: ≥ 90% line coverage.
|
||||
- `forwardAuthorized` coverage ≥ existing `processAuthorizedRequest` coverage baseline.
|
||||
|
||||
### 11.8 Out of scope (follow-ups)
|
||||
|
||||
- Load test of bearer vs cookie hot path.
|
||||
- Fuzzing the JWT parser.
|
||||
- Additional auth methods (mTLS, API keys) — design enables them, but they are separate work.
|
||||
|
||||
## 12. Migration / Rollout
|
||||
|
||||
Default-off. Existing deployments observe no behavioural change. Operators opt in by setting:
|
||||
|
||||
```yaml
|
||||
enableBearerAuth: true
|
||||
audience: https://api.example.com # required when bearer enabled
|
||||
# optional:
|
||||
stripAuthorizationHeader: true # default
|
||||
requireTokenIntrospection: false # default; set true for real-time revocation
|
||||
userIdentifierClaim: client_id # optional override; defaults to sub fallback chain
|
||||
```
|
||||
|
||||
Documentation: update `docs/CONFIGURATION.md` with a bearer-auth section, and add a new `docs/BEARER_AUTH.md` covering the security model, threat assumptions (token issuer is trusted; audience must be set; bearer means trust the issuer's revocation policy unless introspection enabled), and recommended configurations for common IdPs.
|
||||
|
||||
## 13. Security Considerations
|
||||
|
||||
| Concern | Mitigation |
|
||||
|---|---|
|
||||
| Token confusion (ID token used as bearer) | Reuse `detectTokenType` (`token_manager.go:187-303`) which checks `nonce`, `typ: at+jwt`, `token_use`, `scope`, aud-vs-clientID. Belt-and-braces: explicit `nonce` + `token_use == "id"` rejection on top. |
|
||||
| Audience confusion (token for service B accepted by A) | `Audience` mandatory at startup; verified via existing `VerifyJWTSignatureAndClaims`; multi-aud tokens require matching `azp == clientID`. |
|
||||
| Replay-via-blacklist false positive | `verifyOpts{skipReplayMarking: true}` on bearer path. Gates ONLY the Set; the Get stays so revoked tokens still fail. |
|
||||
| Revocation lag | Optional RFC 7662 introspection. Bearer-path introspection cache TTL capped at 60s. Set `RequireTokenIntrospection=true` for real-time revocation. |
|
||||
| `alg`-confusion / `alg=none` attacks | Hard-pin asymmetric allowlist at bearer entry, **before** JWKS fetch. Prevents wasted upstream calls and locks out HS/none probes. |
|
||||
| `kid` injection / JWKS amplification | `kid` length cap (256 bytes) + charset allowlist enforced at bearer entry. |
|
||||
| Bearer 401 brute-force / oracle | Per-IP `failedBearerAttempts` cache; configurable threshold + penalty box returning 429 + `Retry-After`. |
|
||||
| `iat` clock-manipulation / forever-tokens | `MaxTokenAgeSeconds` upper bound (default 24h); cookie path unchanged. |
|
||||
| Identifier-driven header injection | `sanitizeIdentifier`: length cap, control-char + bidi-override + `,;=` rejection. `net/http` rejects CRLF on the wire too (defence in depth). |
|
||||
| Token leakage downstream | `StripAuthorizationHeader=true` by default. Also: `Authorization` stripped on excluded-URL requests so bearer can't leak into health/metrics downstream logs. |
|
||||
| Token-in-logs | All log paths log reason categories, not raw tokens. Identifier hashed via SHA-256 truncated to 8 hex chars before any info/warn-level emission (full identifier only at debug). New `safeLogAuthEvent(category, hashedIdentifier, reasonCode)` helper makes this hard to misuse. |
|
||||
| `email` claim spoofing | Startup fails if `EnableBearerAuth && UserIdentifierClaim == "email"`. Future human-user bearer iteration must add `email_verified` enforcement. |
|
||||
| Bypass on SSE / WS endpoints | SSE/WS bypass keeps cookie-only behaviour; bearer ignored. Operators choose to widen if needed. |
|
||||
| Mixed bearer + cookie precedence | Cookie wins by default (safer for browser scenarios); `BearerOverridesCookie=true` flips. WARN log on both-present requests. |
|
||||
| Configuration drift (operator forgets audience) | Startup fails when `EnableBearerAuth=true && Audience==""`. |
|
||||
| Downstream blast radius when `StripAuthorizationHeader=false` | Documented: forwarded bearer extends token's blast radius to all downstream services. Logs at those services become token stores. Operators must treat downstream log policy accordingly. |
|
||||
| Introspection auth method (pre-existing gap, called out) | `token_introspection.go:80` uses `client_secret_basic` only; does not honour `private_key_jwt`. Out of scope for this PR but documented as a follow-up; operators using `ClientAuthMethod=private_key_jwt` + `RequireTokenIntrospection=true` should be aware introspection will use basic auth. |
|
||||
|
||||
## 14. Open Questions
|
||||
|
||||
None — all design decisions resolved during brainstorming + security review. Implementation may surface incidental questions (e.g. exact clock-skew leeway in `jwt.Verify`); those are out of scope for this spec and handled in the implementation plan.
|
||||
|
||||
## 14a. Security Review Reference
|
||||
|
||||
This design was reviewed by the `security-reviewer` subagent on 2026-05-18. Findings incorporated:
|
||||
|
||||
- **Critical**: C1 (classifier reuses `detectTokenType`), C2 (sub fallback dropped — unreachable due to `jwt.go:416`), C3 (replay-marking gates only Set, not Get; revocation regression test added).
|
||||
- **High**: H1 (alg pinned at bearer entry), H2 (kid length + charset), H3 (cookie wins by default, configurable), H4 (per-IP 401 throttle), H5 (multi-aud requires azp).
|
||||
- **Medium**: M1 (identifier max-length + bidi reject + delimiter chars), M2 (introspection cache TTL capped at 60s on bearer path), M4 (log-hashing via SHA-256[:8]), M5 (StripAuth blast-radius documented), M6 (iat upper-age bound), M7 (Authorization stripped on excluded URLs).
|
||||
- **Low/Nit**: L2 (renamed to `BearerEmitWWWAuthenticate`), N3 (startup rejects `UserIdentifierClaim=email`).
|
||||
- **Documented as pre-existing gaps (follow-up PRs)**: M3 (introspection auth method doesn't honour `private_key_jwt`).
|
||||
|
||||
## 15. Implementation Plan Reference
|
||||
|
||||
To be produced by the `writing-plans` skill in a follow-up document at `docs/superpowers/plans/2026-05-18-bearer-token-auth-plan.md`. The plan decomposes this design into ordered, independently-testable PRs.
|
||||
Vendored
+12
-12
@@ -164,7 +164,7 @@ func (h *HybridBackend) Set(ctx context.Context, key string, value []byte, ttl t
|
||||
|
||||
// Check if we're in fallback mode
|
||||
if h.fallbackMode.Load() {
|
||||
h.logger.Debugf("Operating in fallback mode, skipping L2 write for key: %s", key)
|
||||
h.logger.Debugf("Operating in fallback mode, skipping L2 write for key: %s", redactKey(key))
|
||||
return nil // Don't fail the operation if L2 is down
|
||||
}
|
||||
|
||||
@@ -176,13 +176,13 @@ func (h *HybridBackend) Set(ctx context.Context, key string, value []byte, ttl t
|
||||
// Synchronous write for critical cache types
|
||||
if err := h.secondary.Set(ctx, key, value, ttl); err != nil {
|
||||
h.errors.Add(1)
|
||||
h.logger.Warnf("Failed to write to L2 cache (sync) for key %s: %v", key, err)
|
||||
h.logger.Warnf("Failed to write to L2 cache (sync) for key %s: %v", redactKey(key), err)
|
||||
h.recordL2Error()
|
||||
// Don't fail the operation - L1 write succeeded
|
||||
return nil
|
||||
}
|
||||
h.l2Writes.Add(1)
|
||||
h.logger.Debugf("Synchronous write to L2 completed for critical key: %s", key)
|
||||
h.logger.Debugf("Synchronous write to L2 completed for critical key: %s", redactKey(key))
|
||||
} else {
|
||||
// Asynchronous write for non-critical cache types
|
||||
select {
|
||||
@@ -192,10 +192,10 @@ func (h *HybridBackend) Set(ctx context.Context, key string, value []byte, ttl t
|
||||
ttl: ttl,
|
||||
ctx: ctx,
|
||||
}:
|
||||
h.logger.Debugf("Queued async write to L2 for key: %s", key)
|
||||
h.logger.Debugf("Queued async write to L2 for key: %s", redactKey(key))
|
||||
default:
|
||||
// Buffer is full, log and continue
|
||||
h.logger.Warnf("Async write buffer full, dropping L2 write for key: %s", key)
|
||||
h.logger.Warnf("Async write buffer full, dropping L2 write for key: %s", redactKey(key))
|
||||
h.errors.Add(1)
|
||||
}
|
||||
}
|
||||
@@ -209,7 +209,7 @@ func (h *HybridBackend) Get(ctx context.Context, key string) ([]byte, time.Durat
|
||||
value, ttl, exists, err := h.primary.Get(ctx, key)
|
||||
if err != nil {
|
||||
h.errors.Add(1)
|
||||
h.logger.Debugf("L1 get error for key %s: %v", key, err)
|
||||
h.logger.Debugf("L1 get error for key %s: %v", redactKey(key), err)
|
||||
}
|
||||
|
||||
if exists {
|
||||
@@ -227,7 +227,7 @@ func (h *HybridBackend) Get(ctx context.Context, key string) ([]byte, time.Durat
|
||||
value, ttl, exists, err = h.secondary.Get(ctx, key)
|
||||
if err != nil {
|
||||
h.errors.Add(1)
|
||||
h.logger.Debugf("L2 get error for key %s: %v", key, err)
|
||||
h.logger.Debugf("L2 get error for key %s: %v", redactKey(key), err)
|
||||
h.recordL2Error()
|
||||
h.misses.Add(1)
|
||||
return nil, 0, false, nil // Don't propagate L2 errors
|
||||
@@ -544,7 +544,7 @@ func (h *HybridBackend) queueL1Backfill(key string, value []byte, ttl time.Durat
|
||||
case h.l1BackfillBuffer <- &l1BackfillItem{key: key, value: value, ttl: ttl}:
|
||||
default:
|
||||
h.l1BackfillDrops.Add(1)
|
||||
h.logger.Debugf("L1 backfill buffer full, dropping for key: %s", key)
|
||||
h.logger.Debugf("L1 backfill buffer full, dropping for key: %s", redactKey(key))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -576,9 +576,9 @@ func (h *HybridBackend) l1BackfillWorker() {
|
||||
}
|
||||
writeCtx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
if err := h.primary.Set(writeCtx, item.key, item.value, item.ttl); err != nil {
|
||||
h.logger.Debugf("Failed to populate L1 cache from L2 for key %s: %v", item.key, err)
|
||||
h.logger.Debugf("Failed to populate L1 cache from L2 for key %s: %v", redactKey(item.key), err)
|
||||
} else {
|
||||
h.logger.Debugf("Populated L1 cache from L2 for key: %s", item.key)
|
||||
h.logger.Debugf("Populated L1 cache from L2 for key: %s", redactKey(item.key))
|
||||
}
|
||||
cancel()
|
||||
}
|
||||
@@ -619,11 +619,11 @@ func (h *HybridBackend) asyncWriteWorker() {
|
||||
writeCtx, cancel := context.WithTimeout(item.ctx, 500*time.Millisecond)
|
||||
if err := h.secondary.Set(writeCtx, item.key, item.value, item.ttl); err != nil {
|
||||
h.errors.Add(1)
|
||||
h.logger.Debugf("Async write to L2 failed for key %s: %v", item.key, err)
|
||||
h.logger.Debugf("Async write to L2 failed for key %s: %v", redactKey(item.key), err)
|
||||
h.recordL2Error()
|
||||
} else {
|
||||
h.l2Writes.Add(1)
|
||||
h.logger.Debugf("Async write to L2 completed for key: %s", item.key)
|
||||
h.logger.Debugf("Async write to L2 completed for key: %s", redactKey(item.key))
|
||||
}
|
||||
cancel()
|
||||
}
|
||||
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
// Package backends provides cache backend implementations for the Traefik OIDC plugin.
|
||||
package backends
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
)
|
||||
|
||||
// redactKey returns a short, deterministic hash prefix of a cache key for use
|
||||
// in debug/info log lines. Cache keys in this plugin can include raw access /
|
||||
// refresh / id tokens (any caller may pass an arbitrary string), and CodeQL
|
||||
// flags `key=%s` formatters as a clear-text-logging sink for HTTP-header-
|
||||
// sourced taint. The hash preserves cache-key uniqueness in logs (same key →
|
||||
// same hash, useful for correlating a problematic key across log lines) while
|
||||
// keeping the raw value out of disk-resident log streams.
|
||||
//
|
||||
// 8 hex chars (32 bits) is enough to disambiguate at human-debugging scale
|
||||
// without making the hash itself a useful lookup primitive for an attacker
|
||||
// who only has the log stream.
|
||||
func redactKey(key string) string {
|
||||
if key == "" {
|
||||
return "(empty)"
|
||||
}
|
||||
sum := sha256.Sum256([]byte(key))
|
||||
return hex.EncodeToString(sum[:4])
|
||||
}
|
||||
Vendored
+2
-2
@@ -190,7 +190,7 @@ func (c *Cache) Set(key string, value interface{}, ttl time.Duration) error {
|
||||
c.currentSize++
|
||||
atomic.AddInt64(&c.sets, 1)
|
||||
|
||||
c.logger.Debugf("Cache: Set key=%s, size=%d, ttl=%v", key, size, ttl)
|
||||
c.logger.Debugf("Cache: Set key=%s, size=%d, ttl=%v", redactKey(key), size, ttl)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -346,7 +346,7 @@ func (c *Cache) evictLRU() {
|
||||
item, _ := elem.Value.(*Item) // Safe to ignore: type assertion from known type
|
||||
c.removeItem(item.Key, item)
|
||||
atomic.AddInt64(&c.evictions, 1)
|
||||
c.logger.Debugf("Cache: Evicted LRU item key=%s", item.Key)
|
||||
c.logger.Debugf("Cache: Evicted LRU item key=%s", redactKey(item.Key))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Vendored
+22
@@ -0,0 +1,22 @@
|
||||
// Package cache provides the in-memory cache implementation for the Traefik
|
||||
// OIDC plugin.
|
||||
package cache
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
)
|
||||
|
||||
// redactKey returns a short, deterministic hash prefix of a cache key for use
|
||||
// in debug/info log lines. Cache keys may include raw access / refresh / id
|
||||
// tokens (callers pass arbitrary strings) and CodeQL flags `key=%s`
|
||||
// formatters as a clear-text-logging sink for HTTP-header-sourced taint.
|
||||
// The hash preserves uniqueness in logs (same key → same hash) while keeping
|
||||
// the raw value out of disk-resident log streams.
|
||||
func redactKey(key string) string {
|
||||
if key == "" {
|
||||
return "(empty)"
|
||||
}
|
||||
sum := sha256.Sum256([]byte(key))
|
||||
return hex.EncodeToString(sum[:4])
|
||||
}
|
||||
@@ -0,0 +1,449 @@
|
||||
package traefikoidc
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/sessions"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
// signGraphStyleAccessToken builds a JWT in Microsoft's Graph proprietary
|
||||
// nonce-header form: bytes that get signed contain the SHA256 hash of the
|
||||
// nonce, while the wire token ships the original nonce. A standard JWS
|
||||
// verifier always rejects these with `crypto/rsa: verification error`, which
|
||||
// is why Microsoft documents Graph access tokens as opaque to client apps:
|
||||
//
|
||||
// https://learn.microsoft.com/en-us/entra/identity-platform/access-tokens
|
||||
// "you can't validate tokens for Microsoft Graph according to these rules
|
||||
// due to their proprietary format"
|
||||
func signGraphStyleAccessToken(t *testing.T, key *rsa.PrivateKey, kid, originalNonce string, claims map[string]any) string {
|
||||
t.Helper()
|
||||
|
||||
wireHeader := map[string]any{
|
||||
"alg": "RS256",
|
||||
"kid": kid,
|
||||
"typ": "JWT",
|
||||
"nonce": originalNonce,
|
||||
}
|
||||
wireHeaderJSON, err := json.Marshal(wireHeader)
|
||||
require.NoError(t, err)
|
||||
|
||||
hashed := sha256.Sum256([]byte(originalNonce))
|
||||
signedHeader := map[string]any{
|
||||
"alg": "RS256",
|
||||
"kid": kid,
|
||||
"typ": "JWT",
|
||||
"nonce": fmt.Sprintf("%x", hashed),
|
||||
}
|
||||
signedHeaderJSON, err := json.Marshal(signedHeader)
|
||||
require.NoError(t, err)
|
||||
|
||||
claimsJSON, err := json.Marshal(claims)
|
||||
require.NoError(t, err)
|
||||
|
||||
wireHeaderB64 := base64.RawURLEncoding.EncodeToString(wireHeaderJSON)
|
||||
signedHeaderB64 := base64.RawURLEncoding.EncodeToString(signedHeaderJSON)
|
||||
claimsB64 := base64.RawURLEncoding.EncodeToString(claimsJSON)
|
||||
|
||||
signedInput := signedHeaderB64 + "." + claimsB64
|
||||
hSign := sha256.Sum256([]byte(signedInput))
|
||||
sig, err := rsa.SignPKCS1v15(rand.Reader, key, crypto.SHA256, hSign[:])
|
||||
require.NoError(t, err)
|
||||
|
||||
return wireHeaderB64 + "." + claimsB64 + "." + base64.RawURLEncoding.EncodeToString(sig)
|
||||
}
|
||||
|
||||
// newAzureFollowupOIDC produces a TraefikOidc instance wired for an Azure
|
||||
// AD tenant with a captured error log buffer. Used by the issue #134 followup
|
||||
// tests to assert log behavior during validateAzureTokens flows.
|
||||
func newAzureFollowupOIDC(t *testing.T, jwks *JWKSet) (*TraefikOidc, *bytes.Buffer) {
|
||||
t.Helper()
|
||||
tc := newTestCleanup(t)
|
||||
|
||||
errBuf := &bytes.Buffer{}
|
||||
logger := &Logger{
|
||||
logError: log.New(errBuf, "", 0),
|
||||
logInfo: log.New(io.Discard, "", 0),
|
||||
logDebug: log.New(io.Discard, "", 0),
|
||||
}
|
||||
|
||||
tokenCache := tc.addTokenCache(NewTokenCache())
|
||||
tokenBlacklist := tc.addCache(NewCache())
|
||||
|
||||
oidc := &TraefikOidc{
|
||||
issuerURL: "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
clientID: "test-client-id",
|
||||
audience: "test-client-id",
|
||||
jwksURL: "https://login.microsoftonline.com/tenant-id/discovery/v2.0/keys",
|
||||
limiter: rate.NewLimiter(rate.Every(time.Second), 100),
|
||||
logger: logger,
|
||||
httpClient: &http.Client{Timeout: 10 * time.Second},
|
||||
jwkCache: &MockJWKCache{JWKS: jwks},
|
||||
tokenCache: tokenCache,
|
||||
tokenBlacklist: tokenBlacklist,
|
||||
extractClaimsFunc: extractClaims,
|
||||
}
|
||||
oidc.tokenVerifier = oidc
|
||||
oidc.jwtVerifier = oidc
|
||||
require.True(t, oidc.isAzureProvider(), "fixture must be detected as Azure provider")
|
||||
return oidc, errBuf
|
||||
}
|
||||
|
||||
// authedSessionWithTokens returns a SessionData populated with the supplied
|
||||
// access and ID tokens, marked authenticated and recently created. The
|
||||
// SessionManager carries a real ChunkManager so that GetAccessToken /
|
||||
// GetIDToken / GetRefreshToken behave like the production code path.
|
||||
func authedSessionWithTokens(t *testing.T, accessToken, idToken string) *SessionData {
|
||||
t.Helper()
|
||||
|
||||
chunkLogger := NewLogger("error")
|
||||
chunkManager := NewChunkManager(chunkLogger)
|
||||
t.Cleanup(chunkManager.Shutdown)
|
||||
|
||||
sd := CreateMockSessionData()
|
||||
sd.manager = &SessionManager{
|
||||
sessionMaxAge: 24 * time.Hour,
|
||||
chunkManager: chunkManager,
|
||||
logger: chunkLogger,
|
||||
}
|
||||
|
||||
sd.mainSession = sessions.NewSession(nil, "main")
|
||||
sd.mainSession.Values["authenticated"] = true
|
||||
sd.mainSession.Values["created_at"] = time.Now().Unix()
|
||||
|
||||
sd.accessSession = sessions.NewSession(nil, "access")
|
||||
sd.accessSession.Values["token"] = accessToken
|
||||
sd.accessSession.Values["compressed"] = false
|
||||
|
||||
sd.idTokenSession = sessions.NewSession(nil, "id")
|
||||
sd.idTokenSession.Values["token"] = idToken
|
||||
sd.idTokenSession.Values["compressed"] = false
|
||||
|
||||
sd.refreshSession = sessions.NewSession(nil, "refresh")
|
||||
sd.refreshSession.Values["token"] = ""
|
||||
sd.refreshSession.Values["compressed"] = false
|
||||
|
||||
return sd
|
||||
}
|
||||
|
||||
// TestIssue134_Followup_GraphAccessTokenReproducesUsersError sanity-checks
|
||||
// that our crafted Graph-style token reproduces the exact rsa error string
|
||||
// quoted on the issue thread (dada-engineer 2026-05-08, friek 2026-05-11).
|
||||
//
|
||||
// Sanity test: must always pass, regardless of the issue #134 followup fix.
|
||||
// It exists so a future contributor does not accidentally weaken the
|
||||
// reproducer and assume the followup fix is no longer needed.
|
||||
func TestIssue134_Followup_GraphAccessTokenReproducesUsersError(t *testing.T) {
|
||||
rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
|
||||
const kid = "azure-followup-kid"
|
||||
graphToken := signGraphStyleAccessToken(t, rsaKey, kid, "wire-only-nonce", map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "00000003-0000-0000-c000-000000000000",
|
||||
"exp": time.Now().Add(time.Hour).Unix(),
|
||||
"iat": time.Now().Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"scp": "User.Read",
|
||||
})
|
||||
|
||||
parsedJWT, err := parseJWT(graphToken)
|
||||
require.NoError(t, err)
|
||||
pubKey := &rsaKey.PublicKey
|
||||
alg, _ := parsedJWT.Header["alg"].(string)
|
||||
verifyErr := verifySignatureWithKey(graphToken, pubKey, alg)
|
||||
require.Error(t, verifyErr)
|
||||
assert.Contains(t, verifyErr.Error(), "crypto/rsa: verification error",
|
||||
"reproducer must emit the exact error string reported on issue #134")
|
||||
}
|
||||
|
||||
// TestIssue134_Followup_ValidateAzureTokensSkipsGraphAccessToken is the
|
||||
// failing-then-passing test for the followup fix.
|
||||
//
|
||||
// Symptom (before fix): validateAzureTokens calls verifyToken on every
|
||||
// JWT-shaped access token. For Microsoft Graph access tokens (the default
|
||||
// when no custom resource is registered), verification always fails with
|
||||
// `crypto/rsa: verification error`, generating two error log lines per
|
||||
// request:
|
||||
//
|
||||
// UNKNOWN token verification failed: signature verification failed:
|
||||
// crypto/rsa: verification error
|
||||
// DIAGNOSTIC: Signature verification failed for kid=<kid>, alg=RS256:
|
||||
// crypto/rsa: verification error
|
||||
//
|
||||
// Microsoft's own documentation tells client apps not to validate Graph
|
||||
// access tokens. The fix matches that guidance: when an Azure access token
|
||||
// carries Microsoft's proprietary `nonce` JWT header, treat it as opaque
|
||||
// (skip JWT verification, fall through to ID token validation).
|
||||
func TestIssue134_Followup_ValidateAzureTokensSkipsGraphAccessToken(t *testing.T) {
|
||||
rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
|
||||
const kid = "azure-followup-kid"
|
||||
jwk := JWK{
|
||||
Kty: "RSA",
|
||||
Use: "sig",
|
||||
Alg: "RS256",
|
||||
Kid: kid,
|
||||
N: base64.RawURLEncoding.EncodeToString(rsaKey.N.Bytes()),
|
||||
E: base64.RawURLEncoding.EncodeToString(big.NewInt(int64(rsaKey.E)).Bytes()),
|
||||
}
|
||||
jwks := &JWKSet{Keys: []JWK{jwk}}
|
||||
|
||||
now := time.Now()
|
||||
exp := now.Add(time.Hour).Unix()
|
||||
|
||||
graphAccessToken := signGraphStyleAccessToken(t, rsaKey, kid, "wire-only-nonce-azure-graph", map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "00000003-0000-0000-c000-000000000000",
|
||||
"exp": exp,
|
||||
"iat": now.Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"appid": "test-client-id",
|
||||
"scp": "User.Read",
|
||||
})
|
||||
|
||||
idToken, err := createTestJWT(rsaKey, "RS256", kid, map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "test-client-id",
|
||||
"exp": exp,
|
||||
"iat": now.Add(-2 * time.Minute).Unix(),
|
||||
"nbf": now.Add(-2 * time.Minute).Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"email": "user@example.com",
|
||||
"nonce": "id-token-oidc-nonce",
|
||||
"jti": "id-token-jti-followup",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
oidc, errBuf := newAzureFollowupOIDC(t, jwks)
|
||||
session := authedSessionWithTokens(t, graphAccessToken, idToken)
|
||||
|
||||
authenticated, needsRefresh, expired := oidc.validateAzureTokens(session)
|
||||
|
||||
output := errBuf.String()
|
||||
assert.NotContains(t, output, "crypto/rsa: verification error",
|
||||
"validateAzureTokens must not log rsa verification error for Graph-style access tokens; got: %q", output)
|
||||
assert.NotContains(t, output, "DIAGNOSTIC: Signature verification failed",
|
||||
"DIAGNOSTIC line must not fire for Graph-style access tokens; got: %q", output)
|
||||
assert.NotContains(t, output, "UNKNOWN token verification failed",
|
||||
"UNKNOWN classification log must not fire for Graph-style access tokens; got: %q", output)
|
||||
|
||||
assert.True(t, authenticated, "session must remain authenticated via the ID token fallback")
|
||||
assert.False(t, needsRefresh, "valid ID token must not signal a refresh need")
|
||||
assert.False(t, expired, "valid ID token must not be reported as expired")
|
||||
}
|
||||
|
||||
// TestIssue134_Followup_IsUnverifiableAzureAccessToken_Detection covers the
|
||||
// classifier added by the followup fix. Pure-function unit test for the
|
||||
// Microsoft proprietary marker we rely on (nonce in JWT header).
|
||||
func TestIssue134_Followup_IsUnverifiableAzureAccessToken_Detection(t *testing.T) {
|
||||
rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
|
||||
const kid = "azure-detection-kid"
|
||||
standardToken, err := createTestJWT(rsaKey, "RS256", kid, map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "test-client-id",
|
||||
"exp": time.Now().Add(time.Hour).Unix(),
|
||||
"iat": time.Now().Unix(),
|
||||
"sub": "user-azure-id",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
graphToken := signGraphStyleAccessToken(t, rsaKey, kid, "wire-only-nonce", map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "00000003-0000-0000-c000-000000000000",
|
||||
"exp": time.Now().Add(time.Hour).Unix(),
|
||||
"iat": time.Now().Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"scp": "User.Read",
|
||||
})
|
||||
|
||||
oidc, _ := newAzureFollowupOIDC(t, &JWKSet{})
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
token string
|
||||
wantUnverified bool
|
||||
}{
|
||||
{name: "standard JWT without nonce header", token: standardToken, wantUnverified: false},
|
||||
{name: "Microsoft proprietary token (nonce in header)", token: graphToken, wantUnverified: true},
|
||||
{name: "garbage token treated as unverifiable", token: "not-a-jwt-at-all", wantUnverified: true},
|
||||
{name: "empty token treated as unverifiable", token: "", wantUnverified: true},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := oidc.isUnverifiableAzureAccessToken(tc.token)
|
||||
assert.Equal(t, tc.wantUnverified, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIssue134_Followup_StandardAzureAccessTokenStillVerifies guards against
|
||||
// regression in the happy path: an access token issued for our own clientID
|
||||
// (custom Azure-registered API) — no proprietary nonce header, signed normally
|
||||
// — must still flow through the standard verification path and authenticate.
|
||||
func TestIssue134_Followup_StandardAzureAccessTokenStillVerifies(t *testing.T) {
|
||||
rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
|
||||
const kid = "azure-standard-kid"
|
||||
jwk := JWK{
|
||||
Kty: "RSA", Use: "sig", Alg: "RS256", Kid: kid,
|
||||
N: base64.RawURLEncoding.EncodeToString(rsaKey.N.Bytes()),
|
||||
E: base64.RawURLEncoding.EncodeToString(big.NewInt(int64(rsaKey.E)).Bytes()),
|
||||
}
|
||||
jwks := &JWKSet{Keys: []JWK{jwk}}
|
||||
|
||||
now := time.Now()
|
||||
exp := now.Add(time.Hour).Unix()
|
||||
|
||||
// Custom-resource access token: aud points to the app, no nonce header.
|
||||
accessToken, err := createTestJWT(rsaKey, "RS256", kid, map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "test-client-id",
|
||||
"exp": exp,
|
||||
"iat": now.Add(-2 * time.Minute).Unix(),
|
||||
"nbf": now.Add(-2 * time.Minute).Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"scp": "api.read",
|
||||
"jti": "standard-access-jti",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
idToken, err := createTestJWT(rsaKey, "RS256", kid, map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "test-client-id",
|
||||
"exp": exp,
|
||||
"iat": now.Add(-2 * time.Minute).Unix(),
|
||||
"nbf": now.Add(-2 * time.Minute).Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"email": "user@example.com",
|
||||
"nonce": "id-token-oidc-nonce",
|
||||
"jti": "standard-id-jti",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
oidc, errBuf := newAzureFollowupOIDC(t, jwks)
|
||||
session := authedSessionWithTokens(t, accessToken, idToken)
|
||||
|
||||
authenticated, needsRefresh, expired := oidc.validateAzureTokens(session)
|
||||
|
||||
assert.True(t, authenticated, "standard Azure access token must verify and authenticate")
|
||||
assert.False(t, needsRefresh)
|
||||
assert.False(t, expired)
|
||||
assert.NotContains(t, errBuf.String(), "crypto/rsa: verification error",
|
||||
"standard Azure token must not produce signature errors")
|
||||
}
|
||||
|
||||
// TestIssue134_Followup_GraphAccessTokenWithoutIDToken covers the edge where
|
||||
// the session has only a Graph access token (no ID token). The classifier must
|
||||
// preserve the existing "treat as opaque" semantics for backward compatibility:
|
||||
// authenticated=true even when there is no ID token to verify.
|
||||
func TestIssue134_Followup_GraphAccessTokenWithoutIDToken(t *testing.T) {
|
||||
rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
|
||||
const kid = "azure-no-idt-kid"
|
||||
jwk := JWK{
|
||||
Kty: "RSA", Use: "sig", Alg: "RS256", Kid: kid,
|
||||
N: base64.RawURLEncoding.EncodeToString(rsaKey.N.Bytes()),
|
||||
E: base64.RawURLEncoding.EncodeToString(big.NewInt(int64(rsaKey.E)).Bytes()),
|
||||
}
|
||||
jwks := &JWKSet{Keys: []JWK{jwk}}
|
||||
|
||||
graphAccessToken := signGraphStyleAccessToken(t, rsaKey, kid, "wire-only-nonce-no-idt", map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "00000003-0000-0000-c000-000000000000",
|
||||
"exp": time.Now().Add(time.Hour).Unix(),
|
||||
"iat": time.Now().Unix(),
|
||||
"sub": "user-azure-id",
|
||||
"scp": "User.Read",
|
||||
})
|
||||
|
||||
oidc, errBuf := newAzureFollowupOIDC(t, jwks)
|
||||
session := authedSessionWithTokens(t, graphAccessToken, "")
|
||||
|
||||
authenticated, needsRefresh, expired := oidc.validateAzureTokens(session)
|
||||
|
||||
assert.True(t, authenticated, "Graph token without ID token must remain authenticated (matches existing opaque-token semantics)")
|
||||
assert.False(t, needsRefresh)
|
||||
assert.False(t, expired)
|
||||
assert.NotContains(t, errBuf.String(), "crypto/rsa: verification error")
|
||||
}
|
||||
|
||||
// TestIssue134_Followup_ConfusedDeputyAttackDoesNotBypassVerification proves
|
||||
// the classifier is not a security regression. An attacker who forges a JWT
|
||||
// with a `nonce` JWT header (Microsoft's proprietary marker) but a payload
|
||||
// claiming `aud=our-clientID` should NOT gain authenticated status simply by
|
||||
// triggering the "treat as opaque" branch.
|
||||
//
|
||||
// This is the confused-deputy guardrail Microsoft warns about
|
||||
// (https://cwe.mitre.org/data/definitions/441.html): we treat the access token
|
||||
// as opaque, which means we DO NOT authorize from it — authorization comes
|
||||
// only from a separately verifiable ID token. An attacker without a valid ID
|
||||
// token must not be authenticated.
|
||||
func TestIssue134_Followup_ConfusedDeputyAttackDoesNotBypassVerification(t *testing.T) {
|
||||
rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
attackerKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
require.NoError(t, err)
|
||||
|
||||
const kid = "azure-attack-kid"
|
||||
jwk := JWK{
|
||||
Kty: "RSA", Use: "sig", Alg: "RS256", Kid: kid,
|
||||
N: base64.RawURLEncoding.EncodeToString(rsaKey.N.Bytes()),
|
||||
E: base64.RawURLEncoding.EncodeToString(big.NewInt(int64(rsaKey.E)).Bytes()),
|
||||
}
|
||||
jwks := &JWKSet{Keys: []JWK{jwk}}
|
||||
|
||||
// Forged: attacker uses their OWN key, sets aud = our clientID, plants a
|
||||
// `nonce` header to trip the opaque-detection path.
|
||||
forgedAccessToken := signGraphStyleAccessToken(t, attackerKey, kid, "attacker-nonce", map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "test-client-id",
|
||||
"exp": time.Now().Add(time.Hour).Unix(),
|
||||
"iat": time.Now().Unix(),
|
||||
"sub": "attacker",
|
||||
"scp": "admin",
|
||||
})
|
||||
|
||||
// Forged ID token signed with the attacker's key — must fail verification
|
||||
// against the tenant JWKS.
|
||||
forgedIDToken, err := createTestJWT(attackerKey, "RS256", kid, map[string]any{
|
||||
"iss": "https://login.microsoftonline.com/tenant-id/v2.0",
|
||||
"aud": "test-client-id",
|
||||
"exp": time.Now().Add(time.Hour).Unix(),
|
||||
"iat": time.Now().Add(-2 * time.Minute).Unix(),
|
||||
"nbf": time.Now().Add(-2 * time.Minute).Unix(),
|
||||
"sub": "attacker",
|
||||
"email": "attacker@evil.example",
|
||||
"nonce": "id-token-oidc-nonce",
|
||||
"jti": "attacker-id-jti",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
oidc, _ := newAzureFollowupOIDC(t, jwks)
|
||||
session := authedSessionWithTokens(t, forgedAccessToken, forgedIDToken)
|
||||
|
||||
authenticated, _, _ := oidc.validateAzureTokens(session)
|
||||
assert.False(t, authenticated,
|
||||
"attacker's forged tokens must not authenticate even when the access token has a nonce header — ID token verification rejects the wrong-key signature")
|
||||
}
|
||||
@@ -478,11 +478,10 @@ func TestRefreshCoordinatorIntegration(t *testing.T) {
|
||||
|
||||
// Test 3: Rate limiting
|
||||
t.Run("RateLimiting", func(t *testing.T) {
|
||||
// Reset circuit breaker to closed state for this test
|
||||
coordinator.circuitBreaker.mutex.Lock()
|
||||
// Reset circuit breaker to closed state for this test. All fields are
|
||||
// atomic so we don't need any mutex.
|
||||
atomic.StoreInt32(&coordinator.circuitBreaker.state, 0) // closed
|
||||
atomic.StoreInt32(&coordinator.circuitBreaker.failures, 0)
|
||||
coordinator.circuitBreaker.mutex.Unlock()
|
||||
|
||||
// Temporarily increase circuit breaker threshold to not interfere
|
||||
oldMaxFailures := coordinator.circuitBreaker.config.MaxFailures
|
||||
@@ -525,9 +524,11 @@ func TestRefreshCoordinatorIntegration(t *testing.T) {
|
||||
time.Sleep(config.CleanupInterval * 3)
|
||||
|
||||
// Old sessions should be cleaned up
|
||||
coordinator.attemptsMutex.RLock()
|
||||
count := len(coordinator.sessionRefreshAttempts)
|
||||
coordinator.attemptsMutex.RUnlock()
|
||||
count := 0
|
||||
coordinator.sessionRefreshAttempts.Range(func(_, _ interface{}) bool {
|
||||
count++
|
||||
return true
|
||||
})
|
||||
|
||||
// Should have fewer sessions after cleanup
|
||||
if count > 10 {
|
||||
|
||||
+4
-4
@@ -415,8 +415,8 @@ func TestMiddlewareBackchannelLogoutRouting(t *testing.T) {
|
||||
clientID: "test-client",
|
||||
issuerURL: "https://provider.example.com",
|
||||
initComplete: make(chan struct{}),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
logoutURLPath: "/logout",
|
||||
}
|
||||
close(oidc.initComplete)
|
||||
@@ -457,8 +457,8 @@ func TestMiddlewareFrontchannelLogoutRouting(t *testing.T) {
|
||||
clientID: "test-client",
|
||||
issuerURL: "https://provider.example.com",
|
||||
initComplete: make(chan struct{}),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
logoutURLPath: "/logout",
|
||||
}
|
||||
close(oidc.initComplete)
|
||||
|
||||
@@ -89,6 +89,7 @@ var defaultExcludedURLs = map[string]struct{}{
|
||||
// - The configured TraefikOidc handler ready to process requests.
|
||||
// - An error if essential configuration is missing or invalid (e.g., short encryption key).
|
||||
func New(ctx context.Context, next http.Handler, config *Config, name string) (http.Handler, error) {
|
||||
sendTelemetry(pluginVersion)
|
||||
return NewWithContext(ctx, config, next, name)
|
||||
}
|
||||
|
||||
@@ -239,23 +240,63 @@ func NewWithContext(ctx context.Context, config *Config, next http.Handler, name
|
||||
}
|
||||
return 0
|
||||
}(),
|
||||
tokenCleanupStopChan: make(chan struct{}),
|
||||
metadataRefreshStopChan: make(chan struct{}),
|
||||
ctx: pluginCtx,
|
||||
cancelFunc: cancelFunc,
|
||||
suppressDiagnosticLogs: isTestMode(),
|
||||
securityHeadersApplier: config.GetSecurityHeadersApplier(),
|
||||
scopeFilter: NewScopeFilter(logger), // NEW - for discovery-based scope filtering
|
||||
dcrConfig: config.DynamicClientRegistration,
|
||||
allowPrivateIPAddresses: config.AllowPrivateIPAddresses,
|
||||
minimalHeaders: config.MinimalHeaders,
|
||||
stripAuthCookies: config.StripAuthCookies,
|
||||
enableBackchannelLogout: config.EnableBackchannelLogout,
|
||||
enableFrontchannelLogout: config.EnableFrontchannelLogout,
|
||||
backchannelLogoutPath: normalizeLogoutPath(config.BackchannelLogoutURL),
|
||||
frontchannelLogoutPath: normalizeLogoutPath(config.FrontchannelLogoutURL),
|
||||
sessionInvalidationCache: cacheManager.GetSharedSessionInvalidationCache(),
|
||||
refreshResultCache: cacheManager.GetSharedRefreshResultCache(),
|
||||
tokenCleanupStopChan: make(chan struct{}),
|
||||
metadataRefreshStopChan: make(chan struct{}),
|
||||
ctx: pluginCtx,
|
||||
cancelFunc: cancelFunc,
|
||||
suppressDiagnosticLogs: isTestMode(),
|
||||
securityHeadersApplier: config.GetSecurityHeadersApplier(),
|
||||
scopeFilter: NewScopeFilter(logger), // NEW - for discovery-based scope filtering
|
||||
dcrConfig: config.DynamicClientRegistration,
|
||||
allowPrivateIPAddresses: config.AllowPrivateIPAddresses,
|
||||
minimalHeaders: config.MinimalHeaders,
|
||||
stripAuthCookies: config.StripAuthCookies,
|
||||
enableBackchannelLogout: config.EnableBackchannelLogout,
|
||||
enableFrontchannelLogout: config.EnableFrontchannelLogout,
|
||||
backchannelLogoutPath: normalizeLogoutPath(config.BackchannelLogoutURL),
|
||||
frontchannelLogoutPath: normalizeLogoutPath(config.FrontchannelLogoutURL),
|
||||
sessionInvalidationCache: cacheManager.GetSharedSessionInvalidationCache(),
|
||||
refreshResultCache: cacheManager.GetSharedRefreshResultCache(),
|
||||
enableBearerAuth: config.EnableBearerAuth,
|
||||
stripAuthorizationHeader: config.StripAuthorizationHeader,
|
||||
bearerEmitWWWAuthenticate: config.BearerEmitWWWAuthenticate,
|
||||
bearerOverridesCookie: config.BearerOverridesCookie,
|
||||
bearerIdentifierClaim: func() string {
|
||||
if config.BearerIdentifierClaim != "" {
|
||||
return config.BearerIdentifierClaim
|
||||
}
|
||||
return "sub"
|
||||
}(),
|
||||
maxIdentifierLength: func() int {
|
||||
if config.MaxIdentifierLength > 0 {
|
||||
return config.MaxIdentifierLength
|
||||
}
|
||||
return 256
|
||||
}(),
|
||||
maxTokenAge: func() time.Duration {
|
||||
if config.MaxTokenAgeSeconds > 0 {
|
||||
return time.Duration(config.MaxTokenAgeSeconds) * time.Second
|
||||
}
|
||||
return 24 * time.Hour
|
||||
}(),
|
||||
bearerFailureThreshold: func() int {
|
||||
if config.BearerFailureThreshold > 0 {
|
||||
return config.BearerFailureThreshold
|
||||
}
|
||||
return 20
|
||||
}(),
|
||||
bearerFailureWindow: func() time.Duration {
|
||||
if config.BearerFailureWindowSeconds > 0 {
|
||||
return time.Duration(config.BearerFailureWindowSeconds) * time.Second
|
||||
}
|
||||
return 60 * time.Second
|
||||
}(),
|
||||
bearerFailurePenalty: func() time.Duration {
|
||||
if config.BearerFailurePenaltySeconds > 0 {
|
||||
return time.Duration(config.BearerFailurePenaltySeconds) * time.Second
|
||||
}
|
||||
return 60 * time.Second
|
||||
}(),
|
||||
}
|
||||
|
||||
// Log audience configuration
|
||||
@@ -265,6 +306,31 @@ func NewWithContext(ctx context.Context, config *Config, next http.Handler, name
|
||||
t.logger.Debugf("No custom audience specified, using clientID as audience: %s", t.clientID)
|
||||
}
|
||||
|
||||
// Bearer-auth startup validation. The bearer path is M2M-only and demands
|
||||
// a non-default audience so tokens issued for a different resource cannot
|
||||
// be replayed against this service. The BearerIdentifierClaim guard blocks
|
||||
// the `email` claim explicitly — without email_verified enforcement (out of
|
||||
// scope for M2M), trusting email is a spoofing vector for federated IdPs.
|
||||
// See spec §7.9 / §13.
|
||||
if config.EnableBearerAuth {
|
||||
if config.Audience == "" {
|
||||
cancelFunc()
|
||||
return nil, fmt.Errorf("EnableBearerAuth=true requires Audience to be set explicitly (cannot default to clientID — that path accepts ID tokens)")
|
||||
}
|
||||
if t.bearerIdentifierClaim == "email" {
|
||||
cancelFunc()
|
||||
return nil, fmt.Errorf("enableBearerAuth=true with bearerIdentifierClaim=%q is rejected: email-based identity without email_verified enforcement is a spoofing vector for federated IdPs (use \"sub\" or a custom claim; cookie-path userIdentifierClaim is unaffected)", t.bearerIdentifierClaim)
|
||||
}
|
||||
if !config.StrictAudienceValidation {
|
||||
t.logger.Infof("EnableBearerAuth=true with StrictAudienceValidation=false: recommend enabling strict audience validation for hardening")
|
||||
}
|
||||
t.bearerFailureTracker = newBearerFailureTracker(
|
||||
t.bearerFailureThreshold, t.bearerFailureWindow, t.bearerFailurePenalty,
|
||||
)
|
||||
t.logger.Infof("Bearer-token auth enabled: audience=%q identifierClaim=%q stripAuthz=%t bearerOverridesCookie=%t maxTokenAge=%s",
|
||||
config.Audience, t.bearerIdentifierClaim, t.stripAuthorizationHeader, t.bearerOverridesCookie, t.maxTokenAge)
|
||||
}
|
||||
|
||||
// Convert sessionMaxAge from seconds to duration (0 will use default 24 hours)
|
||||
sessionMaxAge := time.Duration(config.SessionMaxAge) * time.Second
|
||||
t.sessionManager, _ = NewSessionManager(config.SessionEncryptionKey, config.ForceHTTPS, config.CookieDomain, config.CookiePrefix, sessionMaxAge, t.logger) // Safe to ignore: session manager creation with fallback to defaults
|
||||
|
||||
+14
-30
@@ -8,6 +8,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -484,9 +485,8 @@ func TestFirstRequestHandling(t *testing.T) {
|
||||
defer server.Close()
|
||||
|
||||
oidc := &TraefikOidc{
|
||||
providerURL: server.URL,
|
||||
firstRequestReceived: false,
|
||||
firstRequestMutex: sync.Mutex{},
|
||||
providerURL: server.URL,
|
||||
firstRequestStarted: 0,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
},
|
||||
@@ -508,19 +508,13 @@ func TestFirstRequestHandling(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// Simulate first request processing
|
||||
oidc.firstRequestMutex.Lock()
|
||||
if !oidc.firstRequestReceived {
|
||||
oidc.firstRequestReceived = true
|
||||
oidc.firstRequestMutex.Unlock()
|
||||
|
||||
// Simulate first request processing — single-firing via CAS.
|
||||
if atomic.CompareAndSwapInt32(&oidc.firstRequestStarted, 0, 1) {
|
||||
// This would normally be called asynchronously
|
||||
go func() {
|
||||
oidc.initializeMetadata(server.URL)
|
||||
// initComplete is closed internally by initializeMetadata
|
||||
}()
|
||||
} else {
|
||||
oidc.firstRequestMutex.Unlock()
|
||||
}
|
||||
|
||||
// Wait for initialization
|
||||
@@ -556,9 +550,8 @@ func TestFirstRequestHandling(t *testing.T) {
|
||||
defer server.Close()
|
||||
|
||||
oidc := &TraefikOidc{
|
||||
providerURL: server.URL,
|
||||
firstRequestReceived: false,
|
||||
firstRequestMutex: sync.Mutex{},
|
||||
providerURL: server.URL,
|
||||
firstRequestStarted: 0,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
},
|
||||
@@ -580,31 +573,22 @@ func TestFirstRequestHandling(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// Simulate multiple concurrent "first" requests
|
||||
// Simulate multiple concurrent "first" requests — only one CAS winner
|
||||
// fires the bootstrap path.
|
||||
const numRequests = 10
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(numRequests)
|
||||
|
||||
initStarted := 0
|
||||
var initMu sync.Mutex
|
||||
var initStarted int32
|
||||
|
||||
for i := 0; i < numRequests; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
oidc.firstRequestMutex.Lock()
|
||||
if !oidc.firstRequestReceived {
|
||||
oidc.firstRequestReceived = true
|
||||
oidc.firstRequestMutex.Unlock()
|
||||
|
||||
initMu.Lock()
|
||||
initStarted++
|
||||
initMu.Unlock()
|
||||
|
||||
if atomic.CompareAndSwapInt32(&oidc.firstRequestStarted, 0, 1) {
|
||||
atomic.AddInt32(&initStarted, 1)
|
||||
// Only one should actually start initialization
|
||||
oidc.initializeMetadata(server.URL)
|
||||
} else {
|
||||
oidc.firstRequestMutex.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -612,8 +596,8 @@ func TestFirstRequestHandling(t *testing.T) {
|
||||
wg.Wait()
|
||||
|
||||
// Verify only one initialization was started
|
||||
if initStarted != 1 {
|
||||
t.Errorf("expected exactly 1 initialization, got %d", initStarted)
|
||||
if atomic.LoadInt32(&initStarted) != 1 {
|
||||
t.Errorf("expected exactly 1 initialization, got %d", atomic.LoadInt32(&initStarted))
|
||||
}
|
||||
|
||||
// The metadata endpoint might be called once or not at all depending on timing
|
||||
|
||||
+28
-28
@@ -61,8 +61,8 @@ func TestServeHTTP_ExcludedURLs(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com", // Required for initialization check
|
||||
}
|
||||
close(oidc.initComplete)
|
||||
@@ -92,8 +92,8 @@ func TestServeHTTP_EventStream(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
}
|
||||
close(oidc.initComplete)
|
||||
@@ -175,8 +175,8 @@ func TestServeHTTP_WebSocketUpgrade(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
}
|
||||
close(oidc.initComplete)
|
||||
@@ -272,8 +272,8 @@ func TestServeHTTP_InitializationTimeout(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}), // Never close this to simulate timeout
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("GET", "/protected", nil)
|
||||
@@ -307,8 +307,8 @@ func TestServeHTTP_InitializationTimeout(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -337,8 +337,8 @@ func TestServeHTTP_CallbackAndLogout(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -367,8 +367,8 @@ func TestServeHTTP_CallbackAndLogout(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -740,8 +740,8 @@ func TestMinimalHeaders(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
minimalHeaders: tt.minimalHeaders,
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
@@ -817,8 +817,8 @@ func TestMinimalHeaders_TokenHeaderNotSet(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
minimalHeaders: true, // Enable minimal headers
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
@@ -903,8 +903,8 @@ func TestStripAuthCookies(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
stripAuthCookies: tt.stripAuthCookies,
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
@@ -987,8 +987,8 @@ func TestStripAuthCookies_NoCookies(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
stripAuthCookies: true,
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
@@ -1034,8 +1034,8 @@ func TestStripAuthCookies_OnlyOIDCCookies(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
stripAuthCookies: true,
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
@@ -1085,8 +1085,8 @@ func TestStripAuthCookies_OnlyAppCookies(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
stripAuthCookies: true,
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
@@ -1148,8 +1148,8 @@ func TestStripAuthCookies_CustomPrefix(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sm,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
stripAuthCookies: true,
|
||||
extractClaimsFunc: func(token string) (map[string]interface{}, error) {
|
||||
|
||||
+4
-4
@@ -16,6 +16,7 @@ import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -2685,10 +2686,9 @@ func TestMetadataRecoveryOnProviderFailure(t *testing.T) {
|
||||
providerAvailable = true
|
||||
mu.Unlock()
|
||||
|
||||
// Reset the retry timer to allow immediate retry
|
||||
m.metadataRetryMutex.Lock()
|
||||
m.lastMetadataRetryTime = time.Time{} // Reset to zero time
|
||||
m.metadataRetryMutex.Unlock()
|
||||
// Reset the retry timer to allow immediate retry. The field is atomic
|
||||
// now, so no lock is needed.
|
||||
atomic.StoreInt64(&m.lastMetadataRetryNano, 0)
|
||||
|
||||
// Second request should trigger recovery attempt
|
||||
req2 := httptest.NewRequest("GET", "/protected", nil)
|
||||
|
||||
+153
-70
@@ -8,6 +8,7 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/lukaszraczylo/traefikoidc/internal/utils"
|
||||
@@ -145,19 +146,20 @@ func (t *TraefikOidc) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(req.URL.Path, "/health") {
|
||||
t.firstRequestMutex.Lock()
|
||||
if !t.firstRequestReceived {
|
||||
t.firstRequestReceived = true
|
||||
// Lock-free one-shot bootstrap. The previous firstRequestMutex.Lock()
|
||||
// fired on EVERY non-health request forever (even after the boolean
|
||||
// flipped true), which under Yaegi added a per-request serialization
|
||||
// point. CAS gives single-firing semantics with zero steady-state cost.
|
||||
if atomic.CompareAndSwapInt32(&t.firstRequestStarted, 0, 1) {
|
||||
t.logger.Debug("Starting background tasks on first request")
|
||||
t.startTokenCleanup()
|
||||
|
||||
if !t.metadataRefreshStarted && t.providerURL != "" {
|
||||
t.metadataRefreshStarted = true
|
||||
if t.providerURL != "" &&
|
||||
atomic.CompareAndSwapInt32(&t.metadataRefreshStartedAtomic, 0, 1) {
|
||||
// Metadata refresh is handled by singleton resource manager
|
||||
t.startMetadataRefresh(t.providerURL)
|
||||
}
|
||||
}
|
||||
t.firstRequestMutex.Unlock()
|
||||
}
|
||||
|
||||
// Evaluate auth-bypass once, before waiting for initialization. Excluded
|
||||
@@ -168,6 +170,14 @@ func (t *TraefikOidc) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
||||
// unauthenticated traffic would silently expose the backend.
|
||||
if bypass, reason := t.shouldBypassAuth(req); bypass {
|
||||
t.logger.Debugf("Bypassing OIDC for %s (%s)", req.URL.Path, reason)
|
||||
// When bearer auth is enabled, strip the Authorization header on
|
||||
// bypassed paths so a bearer token can't leak into health/metrics/
|
||||
// public endpoint logs via downstream services that don't expect it.
|
||||
// Excluded URLs are explicitly public; bearer is an artifact of the
|
||||
// API auth flow that doesn't belong on them.
|
||||
if t.enableBearerAuth {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
switch reason {
|
||||
case bypassReasonExcluded:
|
||||
// Operator-declared excluded URLs forward unconditionally.
|
||||
@@ -205,14 +215,14 @@ func (t *TraefikOidc) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
||||
t.metadataMu.RUnlock()
|
||||
|
||||
if issuerURL == "" {
|
||||
// Provider metadata initialization failed - try to recover
|
||||
// Retry every 30 seconds to allow automatic recovery when provider comes back online
|
||||
t.metadataRetryMutex.Lock()
|
||||
shouldRetry := time.Since(t.lastMetadataRetryTime) >= 30*time.Second
|
||||
if shouldRetry {
|
||||
t.lastMetadataRetryTime = time.Now()
|
||||
}
|
||||
t.metadataRetryMutex.Unlock()
|
||||
// Provider metadata initialization failed - try to recover.
|
||||
// Retry every 30 seconds to allow automatic recovery. Lock-free
|
||||
// throttle via CAS on lastMetadataRetryNano: one goroutine wins
|
||||
// the window, others see shouldRetry=false.
|
||||
nowNano := time.Now().UnixNano()
|
||||
last := atomic.LoadInt64(&t.lastMetadataRetryNano)
|
||||
shouldRetry := time.Duration(nowNano-last) >= 30*time.Second &&
|
||||
atomic.CompareAndSwapInt64(&t.lastMetadataRetryNano, last, nowNano)
|
||||
|
||||
if shouldRetry && t.providerURL != "" {
|
||||
t.logger.Info("Attempting to recover OIDC provider metadata...")
|
||||
@@ -236,6 +246,24 @@ func (t *TraefikOidc) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
||||
// Bypass checks already ran before the init wait; no need to repeat them.
|
||||
t.sessionManager.CleanupOldCookies(rw, req)
|
||||
|
||||
// Bearer-token auth (opt-in). Runs after init (we need issuer+JWKs+aud
|
||||
// available) and after bypass (excluded URLs always win). Cookie-vs-
|
||||
// bearer precedence is configurable; the safe default is cookie-wins.
|
||||
// See bearer_auth.go for the full pipeline.
|
||||
if t.enableBearerAuth {
|
||||
if _, hasBearer := detectBearerToken(req); hasBearer {
|
||||
cookiePresent := t.hasSessionCookie(req)
|
||||
if !cookiePresent || t.bearerOverridesCookie {
|
||||
if cookiePresent {
|
||||
t.logger.Infof("Both Authorization: Bearer and session cookie present on %s; bearer-wins per BearerOverridesCookie=true", req.URL.Path)
|
||||
}
|
||||
t.handleBearerRequest(rw, req)
|
||||
return
|
||||
}
|
||||
t.logger.Infof("Both Authorization: Bearer and session cookie present on %s; cookie-wins (default); bearer ignored", req.URL.Path)
|
||||
}
|
||||
}
|
||||
|
||||
session, err := t.sessionManager.GetSession(req)
|
||||
if err != nil {
|
||||
t.logger.Errorf("Error getting session: %v. Initiating authentication.", err)
|
||||
@@ -401,10 +429,17 @@ func (t *TraefikOidc) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
||||
t.defaultInitiateAuthentication(rw, req, session, redirectURL)
|
||||
}
|
||||
|
||||
// processAuthorizedRequest processes requests for authenticated users.
|
||||
// It extracts claims, validates roles/groups if configured, sets authentication headers,
|
||||
// processes header templates, and forwards the request to the next handler.
|
||||
// Domain checks should be performed before calling this method.
|
||||
// processAuthorizedRequest processes requests for authenticated cookie/session
|
||||
// users. It performs session-specific checks (identifier presence, backchannel-
|
||||
// logout invalidation, claims extraction with potential re-auth), persists
|
||||
// dirty session state, then delegates the post-auth pipeline (roles/groups,
|
||||
// header injection, security headers, cookie strip, forward) to
|
||||
// forwardAuthorized.
|
||||
//
|
||||
// The bearer-token path uses the same forwardAuthorized helper but takes a
|
||||
// different route to it (see bearer_auth.go). Keeping forwardAuthorized
|
||||
// session-agnostic is what lets the two auth methods share one pipeline.
|
||||
//
|
||||
// Parameters:
|
||||
// - rw: The HTTP response writer.
|
||||
// - req: The HTTP request to process.
|
||||
@@ -442,8 +477,7 @@ func (t *TraefikOidc) processAuthorizedRequest(rw http.ResponseWriter, req *http
|
||||
// the parsed claims keyed on the raw ID token, so concurrent dashboard
|
||||
// panel requests on the same session don't repeatedly base64-decode and
|
||||
// JSON-unmarshal the same JWT (a real cost under the yaegi interpreter
|
||||
// that hosts Traefik plugins). idClaims is reused below by the
|
||||
// header-templates branch.
|
||||
// that hosts Traefik plugins).
|
||||
idToken := session.GetIDToken()
|
||||
var (
|
||||
idClaims map[string]interface{}
|
||||
@@ -472,18 +506,76 @@ func (t *TraefikOidc) processAuthorizedRequest(rw http.ResponseWriter, req *http
|
||||
return
|
||||
}
|
||||
|
||||
var groups, roles []string
|
||||
if groupClaimsErr != nil && len(t.allowedRolesAndGroups) > 0 {
|
||||
// Claims couldn't be extracted but roles checks are required:
|
||||
// re-authenticate rather than 403 (session may be salvageable on
|
||||
// re-issue). Bearer path uses 401 for the equivalent failure.
|
||||
t.logger.Errorf("Failed to extract claims for roles/groups check: %v", groupClaimsErr)
|
||||
session.ResetRedirectCount()
|
||||
t.defaultInitiateAuthentication(rw, req, session, redirectURL)
|
||||
return
|
||||
}
|
||||
|
||||
if groupClaimsErr == nil && groupClaims != nil {
|
||||
var err error
|
||||
groups, roles, err = t.extractGroupsAndRolesFromClaims(groupClaims)
|
||||
if err != nil && len(t.allowedRolesAndGroups) > 0 {
|
||||
t.logger.Errorf("Failed to extract groups and roles: %v", err)
|
||||
session.ResetRedirectCount()
|
||||
t.defaultInitiateAuthentication(rw, req, session, redirectURL)
|
||||
// Persist any dirty session state BEFORE forwardAuthorized writes the
|
||||
// response. Once next.ServeHTTP fires, Set-Cookie can no longer reach
|
||||
// the client. The forwardAuthorized pipeline does not mutate session
|
||||
// state, so saving here is safe.
|
||||
if session.IsDirty() {
|
||||
if err := session.Save(req, rw); err != nil {
|
||||
t.logger.Errorf("Failed to save session after processing headers: %v", err)
|
||||
}
|
||||
} else {
|
||||
t.logger.Debug("Session not dirty, skipping save in processAuthorizedRequest")
|
||||
}
|
||||
|
||||
// Build the source-agnostic principal. ID-token claims drive header
|
||||
// templates and roles when present; otherwise fall back to access-token
|
||||
// claims (matches prior behavior for opaque-ID-token providers).
|
||||
p := &principal{
|
||||
Source: sourceSession,
|
||||
Identifier: userIdentifier,
|
||||
AccessToken: session.GetAccessToken(),
|
||||
IDToken: idToken,
|
||||
RefreshToken: session.GetRefreshToken(),
|
||||
Claims: groupClaims,
|
||||
}
|
||||
|
||||
t.forwardAuthorized(rw, req, p)
|
||||
}
|
||||
|
||||
// forwardAuthorized completes the post-authentication pipeline shared by the
|
||||
// cookie/session path and the bearer-token path. It performs:
|
||||
//
|
||||
// 1. Roles/groups extraction from p.Claims (idempotent; existing
|
||||
// extractGroupsAndRolesFromClaims helper).
|
||||
// 2. allowedRolesAndGroups gate — writes a 403 and returns if denied.
|
||||
// 3. Identity-header injection (X-Forwarded-User, X-User-Groups, X-User-Roles,
|
||||
// plus X-Auth-Request-* when !minimalHeaders).
|
||||
// 4. Operator-defined header templates.
|
||||
// 5. Security headers (delegated to t.securityHeadersApplier or fallback).
|
||||
// 6. OIDC session-cookie strip (stripAuthCookies).
|
||||
// 7. Authorization header strip on bearer source when stripAuthorizationHeader.
|
||||
// 8. next.ServeHTTP.
|
||||
//
|
||||
// Session persistence is the CALLER's responsibility — it must happen before
|
||||
// this function so Set-Cookie reaches the response.
|
||||
func (t *TraefikOidc) forwardAuthorized(rw http.ResponseWriter, req *http.Request, p *principal) {
|
||||
var (
|
||||
groups, roles []string
|
||||
extractErr error
|
||||
)
|
||||
if p.Claims != nil {
|
||||
groups, roles, extractErr = t.extractGroupsAndRolesFromClaims(p.Claims)
|
||||
if extractErr != nil && len(t.allowedRolesAndGroups) > 0 {
|
||||
// Bearer path: 403 (caller already verified the token; principal
|
||||
// claims are present but malformed for roles purposes).
|
||||
// Cookie path can't reach here because processAuthorizedRequest
|
||||
// catches groupClaimsErr earlier.
|
||||
t.logger.Errorf("Failed to extract groups and roles: %v", extractErr)
|
||||
t.sendErrorResponse(rw, req, "Access denied", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if err == nil {
|
||||
if extractErr == nil {
|
||||
if len(groups) > 0 {
|
||||
req.Header.Set("X-User-Groups", strings.Join(groups, ","))
|
||||
}
|
||||
@@ -502,62 +594,46 @@ func (t *TraefikOidc) processAuthorizedRequest(rw http.ResponseWriter, req *http
|
||||
}
|
||||
}
|
||||
if !allowed {
|
||||
t.logger.Infof("User %s does not have any allowed roles or groups", userIdentifier)
|
||||
t.logger.Infof("User %s does not have any allowed roles or groups", p.Identifier)
|
||||
errorMsg := fmt.Sprintf("Access denied: You do not have any of the allowed roles or groups. To log out, visit: %s", t.logoutURLPath)
|
||||
t.sendErrorResponse(rw, req, errorMsg, http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
req.Header.Set("X-Forwarded-User", userIdentifier)
|
||||
req.Header.Set("X-Forwarded-User", p.Identifier)
|
||||
|
||||
// When minimalHeaders is enabled, skip extra headers to prevent 431 errors
|
||||
if !t.minimalHeaders {
|
||||
req.Header.Set("X-Auth-Request-Redirect", req.URL.RequestURI())
|
||||
req.Header.Set("X-Auth-Request-User", userIdentifier)
|
||||
if idToken != "" {
|
||||
req.Header.Set("X-Auth-Request-Token", idToken)
|
||||
req.Header.Set("X-Auth-Request-User", p.Identifier)
|
||||
if p.IDToken != "" {
|
||||
req.Header.Set("X-Auth-Request-Token", p.IDToken)
|
||||
}
|
||||
}
|
||||
|
||||
if len(t.headerTemplates) > 0 {
|
||||
if idClaimsErr != nil {
|
||||
t.logger.Errorf("Failed to extract claims from ID Token for template headers: %v", idClaimsErr)
|
||||
} else {
|
||||
// idClaims may be nil when no ID token is present; templates
|
||||
// referencing .Claims.* will simply produce empty values, which
|
||||
// matches the prior behavior.
|
||||
templateData := map[string]interface{}{
|
||||
"AccessToken": session.GetAccessToken(),
|
||||
"IDToken": idToken,
|
||||
"RefreshToken": session.GetRefreshToken(),
|
||||
"Claims": idClaims,
|
||||
}
|
||||
|
||||
for headerName, tmpl := range t.headerTemplates {
|
||||
var buf bytes.Buffer
|
||||
if err := tmpl.Execute(&buf, templateData); err != nil {
|
||||
t.logger.Errorf("Failed to execute template for header %s: %v", headerName, err)
|
||||
continue
|
||||
}
|
||||
headerValue := buf.String()
|
||||
req.Header.Set(headerName, headerValue)
|
||||
t.logger.Debugf("Set templated header %s = %s", headerName, headerValue)
|
||||
}
|
||||
// NOTE: templates only mutate request headers (not session state),
|
||||
// so we deliberately do NOT MarkDirty / Save here. Previously every
|
||||
// authenticated request with header templates re-encrypted and
|
||||
// rewrote all session cookies, which was a measurable CPU and
|
||||
// Set-Cookie tax on dashboards that poll many panels per second.
|
||||
// p.Claims may be nil (e.g. session without an ID token). Templates
|
||||
// referencing .Claims.* will simply produce empty values — matches
|
||||
// the prior behavior. Bearer-source principals always carry access-
|
||||
// token claims (post-verifyToken).
|
||||
templateData := map[string]interface{}{
|
||||
"AccessToken": p.AccessToken,
|
||||
"IDToken": p.IDToken,
|
||||
"RefreshToken": p.RefreshToken,
|
||||
"Claims": p.Claims,
|
||||
}
|
||||
}
|
||||
|
||||
if session.IsDirty() {
|
||||
if err := session.Save(req, rw); err != nil {
|
||||
t.logger.Errorf("Failed to save session after processing headers: %v", err)
|
||||
for headerName, tmpl := range t.headerTemplates {
|
||||
var buf bytes.Buffer
|
||||
if err := tmpl.Execute(&buf, templateData); err != nil {
|
||||
t.logger.Errorf("Failed to execute template for header %s: %v", headerName, err)
|
||||
continue
|
||||
}
|
||||
headerValue := buf.String()
|
||||
req.Header.Set(headerName, headerValue)
|
||||
t.logger.Debugf("Set templated header %s = %s", headerName, headerValue)
|
||||
}
|
||||
} else {
|
||||
t.logger.Debug("Session not dirty, skipping save in processAuthorizedRequest")
|
||||
}
|
||||
|
||||
// Apply security headers if configured
|
||||
@@ -573,7 +649,7 @@ func (t *TraefikOidc) processAuthorizedRequest(rw http.ResponseWriter, req *http
|
||||
|
||||
// Strip OIDC session cookies before forwarding to the backend to prevent
|
||||
// HTTP 431 "Request Header Fields Too Large" errors (GitHub issue #122).
|
||||
if t.stripAuthCookies {
|
||||
if t.stripAuthCookies && t.sessionManager != nil {
|
||||
prefix := t.sessionManager.GetCookiePrefix()
|
||||
filtered := make([]*http.Cookie, 0, len(req.Cookies()))
|
||||
for _, c := range req.Cookies() {
|
||||
@@ -587,7 +663,14 @@ func (t *TraefikOidc) processAuthorizedRequest(rw http.ResponseWriter, req *http
|
||||
}
|
||||
}
|
||||
|
||||
t.logger.Debugf("Request authorized for user %s, forwarding to next handler", userIdentifier)
|
||||
// Bearer source: strip the Authorization header to keep the raw token
|
||||
// out of downstream service logs. Off-by-config for operators who chain
|
||||
// services that each re-verify the bearer.
|
||||
if p.Source == sourceBearer && t.stripAuthorizationHeader {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
|
||||
t.logger.Debugf("Request authorized for user %s (source=%d), forwarding to next handler", p.Identifier, p.Source)
|
||||
|
||||
t.next.ServeHTTP(rw, req)
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@ func TestMiddlewareContextCancellation(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}), // Never close to simulate waiting
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
}
|
||||
|
||||
// Create request with canceled context
|
||||
@@ -39,8 +39,8 @@ func TestMiddlewareSessionErrorRecovery(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -73,8 +73,8 @@ func TestMiddlewareAJAXRequestHandling(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -102,8 +102,8 @@ func TestLogoutWorksWithoutOIDCInitialization(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}), // Never close to simulate provider unavailable
|
||||
sessionManager: createTestSessionManager(t),
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
logoutURLPath: "/logout",
|
||||
postLogoutRedirectURI: "/",
|
||||
forceHTTPS: false,
|
||||
@@ -142,8 +142,8 @@ func TestMiddlewareDomainRestrictions(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -187,8 +187,8 @@ func TestMiddlewareDomainRestrictions(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
@@ -236,8 +236,8 @@ func TestMiddlewareOpaqueTokenHandling(t *testing.T) {
|
||||
logger: NewLogger("debug"),
|
||||
initComplete: make(chan struct{}),
|
||||
sessionManager: sessionManager,
|
||||
firstRequestReceived: true,
|
||||
metadataRefreshStarted: true,
|
||||
firstRequestStarted: 1,
|
||||
metadataRefreshStartedAtomic: 1,
|
||||
issuerURL: "https://provider.example.com",
|
||||
redirURLPath: "/callback",
|
||||
logoutURLPath: "/logout",
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
// Package traefikoidc — principal abstraction for the shared post-auth
|
||||
// pipeline. A principal carries the resolved identity + tokens + claims
|
||||
// produced by EITHER the cookie session path or the bearer-token path, so
|
||||
// downstream header injection / roles checks / forwarding can be implemented
|
||||
// once and reused.
|
||||
package traefikoidc
|
||||
|
||||
// principalSource indicates which auth path produced a principal. Used by
|
||||
// forwardAuthorized to decide source-specific behavior (e.g. only strip the
|
||||
// Authorization header for bearer-source principals).
|
||||
type principalSource int
|
||||
|
||||
const (
|
||||
sourceSession principalSource = iota
|
||||
sourceBearer
|
||||
)
|
||||
|
||||
// principal is the immutable post-auth value passed to forwardAuthorized.
|
||||
// No methods mutate it; no manager pointer; no I/O. Pure data.
|
||||
type principal struct {
|
||||
Claims map[string]interface{}
|
||||
Identifier string
|
||||
Subject string
|
||||
ClientID string
|
||||
AccessToken string
|
||||
IDToken string
|
||||
RefreshToken string
|
||||
Source principalSource
|
||||
}
|
||||
|
||||
// buildPrincipalFromSession adapts an authenticated SessionData into a
|
||||
// principal value WITHOUT writing back to the session. This is the only
|
||||
// function that still knows about SessionData; the rest of the pipeline is
|
||||
// session-agnostic. Returns nil when the session has no usable identity.
|
||||
func (t *TraefikOidc) buildPrincipalFromSession(session *SessionData) *principal {
|
||||
if session == nil {
|
||||
return nil
|
||||
}
|
||||
identifier := session.GetUserIdentifier()
|
||||
if identifier == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var claims map[string]interface{}
|
||||
if idToken := session.GetIDToken(); idToken != "" && t.extractClaimsFunc != nil {
|
||||
// Best-effort: cached on the session, never blocking.
|
||||
claims, _ = session.GetIDTokenClaims(t.extractClaimsFunc) // Safe to ignore: claims-error path handled by header-template branch
|
||||
}
|
||||
|
||||
return &principal{
|
||||
Source: sourceSession,
|
||||
Identifier: identifier,
|
||||
AccessToken: session.GetAccessToken(),
|
||||
IDToken: session.GetIDToken(),
|
||||
RefreshToken: session.GetRefreshToken(),
|
||||
Claims: claims,
|
||||
}
|
||||
}
|
||||
+222
-166
@@ -15,17 +15,29 @@ import (
|
||||
// It implements request coalescing, rate limiting, and circuit breaking
|
||||
// specifically for token refresh operations.
|
||||
type RefreshCoordinator struct {
|
||||
inFlightRefreshes map[string]*refreshOperation
|
||||
// inFlightRefreshes maps tokenHash -> *refreshOperation. sync.Map is used
|
||||
// instead of a plain map + RWMutex so concurrent refreshes do not
|
||||
// serialize on a single global lock. Under Yaegi the previous
|
||||
// refreshMutex.Lock() was held for tens of milliseconds per request due
|
||||
// to interpreter overhead on the work inside the critical section,
|
||||
// causing dozens of goroutines to stack up on it and pin one CPU core.
|
||||
inFlightRefreshes sync.Map
|
||||
// sessionRefreshAttempts maps sessionID -> *refreshAttemptTracker.
|
||||
// sync.Map + atomic tracker fields means isInCooldown/recordRefreshAttempt/
|
||||
// recordRefreshSuccess/recordRefreshFailure are lock-free. Previously
|
||||
// these used attemptsMutex sync.RWMutex; under Yaegi every Lock() acquisition
|
||||
// adds 10-50ms of dispatch overhead, and they were called twice per leader
|
||||
// request (once for recordRefreshAttempt, once for isInCooldown). That
|
||||
// serializing pattern caused the v1.0.15 death spiral after v1.0.14
|
||||
// removed the refreshMutex (same architectural shape, different mutex).
|
||||
sessionRefreshAttempts sync.Map
|
||||
cleanupTimers map[string]*time.Timer
|
||||
sessionRefreshAttempts map[string]*refreshAttemptTracker
|
||||
circuitBreaker *RefreshCircuitBreaker
|
||||
metrics *RefreshMetrics
|
||||
logger *Logger
|
||||
stopChan chan struct{}
|
||||
config RefreshCoordinatorConfig
|
||||
wg sync.WaitGroup
|
||||
attemptsMutex sync.RWMutex
|
||||
refreshMutex sync.RWMutex
|
||||
cleanupTimerMu sync.Mutex
|
||||
}
|
||||
|
||||
@@ -84,14 +96,22 @@ type refreshResult struct {
|
||||
fromCache bool
|
||||
}
|
||||
|
||||
// refreshAttemptTracker tracks refresh attempts for a session
|
||||
// refreshAttemptTracker tracks refresh attempts for a session. All fields are
|
||||
// accessed via sync/atomic so isInCooldown/recordRefreshAttempt/Success/Failure
|
||||
// can run without holding any per-coordinator lock. Times are UnixNano so they
|
||||
// fit in an int64 and can be read with a single atomic.LoadInt64.
|
||||
//
|
||||
// cooldownEndNano == 0 means "not in cooldown". This sentinel replaces the
|
||||
// inCooldown bool that the previous implementation kept under attemptsMutex —
|
||||
// under Yaegi any per-request global mutex turns into a serializing bottleneck
|
||||
// (the v1.0.14 refreshMutex -> sync.Map fix removed only one such bottleneck;
|
||||
// attemptsMutex was the next one in the queue).
|
||||
type refreshAttemptTracker struct {
|
||||
lastAttemptTime time.Time
|
||||
windowStartTime time.Time
|
||||
cooldownEndTime time.Time
|
||||
attempts int32
|
||||
consecutiveFailures int32
|
||||
inCooldown bool
|
||||
lastAttemptNano int64 // atomic, UnixNano of last attempt
|
||||
windowStartNano int64 // atomic, UnixNano of attempt-window start
|
||||
cooldownEndNano int64 // atomic, UnixNano; 0 = not in cooldown
|
||||
attempts int32 // atomic
|
||||
consecutiveFailures int32 // atomic
|
||||
}
|
||||
|
||||
// RefreshMetrics tracks coordinator performance metrics
|
||||
@@ -106,14 +126,18 @@ type RefreshMetrics struct {
|
||||
currentInFlightRefreshes int32
|
||||
}
|
||||
|
||||
// RefreshCircuitBreaker implements a circuit breaker specifically for refresh operations
|
||||
// RefreshCircuitBreaker implements a circuit breaker specifically for refresh
|
||||
// operations. All mutable fields are atomic so AllowRequest/RecordSuccess/
|
||||
// RecordFailure run without any mutex. The previous sync.RWMutex.RLock() was
|
||||
// taken on every CoordinateRefresh — under Yaegi this added 10-50ms of
|
||||
// interpreter dispatch per call, which compounded with attemptsMutex to keep
|
||||
// the pod's single CPU core saturated.
|
||||
type RefreshCircuitBreaker struct {
|
||||
lastFailureTime time.Time
|
||||
lastSuccessTime time.Time
|
||||
lastFailureNano int64 // atomic, UnixNano of most recent failure
|
||||
lastSuccessNano int64 // atomic, UnixNano of most recent success
|
||||
config RefreshCircuitBreakerConfig
|
||||
mutex sync.RWMutex
|
||||
state int32
|
||||
failures int32
|
||||
state int32 // atomic: 0=closed, 1=open, 2=half-open
|
||||
failures int32 // atomic
|
||||
}
|
||||
|
||||
// RefreshCircuitBreakerConfig configures the refresh circuit breaker
|
||||
@@ -130,13 +154,13 @@ func NewRefreshCoordinator(config RefreshCoordinatorConfig, logger *Logger) *Ref
|
||||
}
|
||||
|
||||
rc := &RefreshCoordinator{
|
||||
inFlightRefreshes: make(map[string]*refreshOperation),
|
||||
sessionRefreshAttempts: make(map[string]*refreshAttemptTracker),
|
||||
config: config,
|
||||
metrics: &RefreshMetrics{},
|
||||
logger: logger,
|
||||
stopChan: make(chan struct{}),
|
||||
cleanupTimers: make(map[string]*time.Timer),
|
||||
// inFlightRefreshes and sessionRefreshAttempts are both sync.Map;
|
||||
// their zero values are ready to use.
|
||||
config: config,
|
||||
metrics: &RefreshMetrics{},
|
||||
logger: logger,
|
||||
stopChan: make(chan struct{}),
|
||||
cleanupTimers: make(map[string]*time.Timer),
|
||||
circuitBreaker: &RefreshCircuitBreaker{
|
||||
config: RefreshCircuitBreakerConfig{
|
||||
MaxFailures: 3,
|
||||
@@ -227,13 +251,28 @@ func (rc *RefreshCoordinator) getOrCreateOperation(
|
||||
tokenHash string,
|
||||
refreshToken string,
|
||||
) (*refreshOperation, bool, error) {
|
||||
rc.refreshMutex.Lock()
|
||||
defer rc.refreshMutex.Unlock()
|
||||
// Speculatively construct the operation we WOULD register if we win the
|
||||
// race. Allocating here keeps the LoadOrStore call below atomic and
|
||||
// avoids any global lock — under Yaegi the previous map+RWMutex design
|
||||
// held the write lock long enough (tens of ms per call) that concurrent
|
||||
// refreshes on the same coordinator serialized into a queue that grew
|
||||
// without bound. See struct comment on inFlightRefreshes.
|
||||
candidate := &refreshOperation{
|
||||
refreshToken: refreshToken,
|
||||
done: make(chan struct{}),
|
||||
startTime: time.Now(),
|
||||
waiterCount: 1,
|
||||
}
|
||||
|
||||
// Check for existing operation while holding the lock
|
||||
if existingOp, exists := rc.inFlightRefreshes[tokenHash]; exists {
|
||||
if existing, loaded := rc.inFlightRefreshes.LoadOrStore(tokenHash, candidate); loaded {
|
||||
existingOp, ok := existing.(*refreshOperation)
|
||||
if !ok {
|
||||
// Defensive: anything stored here is always *refreshOperation, but
|
||||
// keep the typed assert so a programming error elsewhere doesn't
|
||||
// surface as a confusing panic in an interpreter frame.
|
||||
return nil, false, fmt.Errorf("inFlightRefreshes corrupt: unexpected type %T", existing)
|
||||
}
|
||||
if existingOp.refreshToken == refreshToken {
|
||||
// Join existing operation
|
||||
atomic.AddInt32(&existingOp.waiterCount, 1)
|
||||
return existingOp, false, nil
|
||||
}
|
||||
@@ -241,41 +280,60 @@ func (rc *RefreshCoordinator) getOrCreateOperation(
|
||||
return nil, false, fmt.Errorf("refresh token mismatch")
|
||||
}
|
||||
|
||||
// No existing operation - check if we can create a new one
|
||||
// All checks happen while holding the lock to prevent races
|
||||
// We won the race and registered `candidate`. Apply gates now. If any
|
||||
// gate fails we must remove our entry from the map and signal failure
|
||||
// to any joiners that snuck in between LoadOrStore and now.
|
||||
if err := rc.applyLeaderGates(sessionID); err != nil {
|
||||
rc.failCandidate(tokenHash, candidate, err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
// Check and record refresh attempt for rate limiting
|
||||
// Reserve concurrent slot via CAS — without the old global lock we can
|
||||
// no longer rely on mutex-mediated check-then-increment. If we lose the
|
||||
// CAS race we retry; if the limit has since been reached we back out.
|
||||
for {
|
||||
current := atomic.LoadInt32(&rc.metrics.currentInFlightRefreshes)
|
||||
if int(current) >= rc.config.MaxConcurrentRefreshes {
|
||||
err := fmt.Errorf("maximum concurrent refresh operations reached")
|
||||
rc.failCandidate(tokenHash, candidate, err)
|
||||
return nil, false, err
|
||||
}
|
||||
if atomic.CompareAndSwapInt32(&rc.metrics.currentInFlightRefreshes, current, current+1) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return candidate, true, nil
|
||||
}
|
||||
|
||||
// applyLeaderGates runs the rate-limit, cooldown, and memory-pressure checks
|
||||
// that previously ran under the global refreshMutex. Only the leader (the
|
||||
// goroutine that just registered the operation) runs them; joiners share the
|
||||
// leader's outcome via operation.done.
|
||||
func (rc *RefreshCoordinator) applyLeaderGates(sessionID string) error {
|
||||
rc.recordRefreshAttempt(sessionID)
|
||||
if rc.isInCooldown(sessionID) {
|
||||
atomic.AddInt64(&rc.metrics.cooldownsTriggered, 1)
|
||||
return nil, false, fmt.Errorf("refresh attempts exceeded for session, in cooldown period")
|
||||
return fmt.Errorf("refresh attempts exceeded for session, in cooldown period")
|
||||
}
|
||||
|
||||
// Check memory pressure
|
||||
if rc.config.EnableMemoryPressureDetection && rc.isUnderMemoryPressure() {
|
||||
atomic.AddInt64(&rc.metrics.memoryPressureEvents, 1)
|
||||
return nil, false, fmt.Errorf("system under memory pressure, refresh denied")
|
||||
return fmt.Errorf("system under memory pressure, refresh denied")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check and reserve concurrent refresh slot atomically
|
||||
current := atomic.LoadInt32(&rc.metrics.currentInFlightRefreshes)
|
||||
if int(current) >= rc.config.MaxConcurrentRefreshes {
|
||||
return nil, false, fmt.Errorf("maximum concurrent refresh operations reached")
|
||||
}
|
||||
|
||||
// Reserve the slot - we're still holding the lock so this is safe
|
||||
atomic.AddInt32(&rc.metrics.currentInFlightRefreshes, 1)
|
||||
|
||||
// Create and register new operation
|
||||
operation := &refreshOperation{
|
||||
refreshToken: refreshToken,
|
||||
done: make(chan struct{}),
|
||||
startTime: time.Now(),
|
||||
waiterCount: 1,
|
||||
}
|
||||
rc.inFlightRefreshes[tokenHash] = operation
|
||||
|
||||
return operation, true, nil
|
||||
// failCandidate removes the leader's just-registered operation from the
|
||||
// in-flight map and signals the error to any joiners by recording the result
|
||||
// and closing the done channel. This keeps the (nil, false, err) return path
|
||||
// equivalent to the pre-sync.Map version: callers see the error directly,
|
||||
// joiners see it via operation.done.
|
||||
func (rc *RefreshCoordinator) failCandidate(tokenHash string, op *refreshOperation, err error) {
|
||||
rc.inFlightRefreshes.Delete(tokenHash)
|
||||
op.mutex.Lock()
|
||||
op.result = &refreshResult{err: err}
|
||||
op.mutex.Unlock()
|
||||
close(op.done)
|
||||
}
|
||||
|
||||
// executeRefreshAsync performs the actual refresh operation asynchronously
|
||||
@@ -367,100 +425,108 @@ func (rc *RefreshCoordinator) scheduleDelayedCleanup(tokenHash string) {
|
||||
|
||||
// performCleanup removes the operation from the in-flight map.
|
||||
// Idempotent: only decrements the in-flight counter if an entry was actually
|
||||
// removed. This guards against any future path accidentally calling cleanup
|
||||
// twice for the same tokenHash (which would corrupt the refresh budget).
|
||||
// removed. LoadAndDelete is atomic so any concurrent failCandidate or repeat
|
||||
// cleanup call will see exactly one removal — the budget cannot be corrupted
|
||||
// by double-decrement.
|
||||
func (rc *RefreshCoordinator) performCleanup(tokenHash string) {
|
||||
rc.refreshMutex.Lock()
|
||||
_, existed := rc.inFlightRefreshes[tokenHash]
|
||||
if existed {
|
||||
delete(rc.inFlightRefreshes, tokenHash)
|
||||
}
|
||||
rc.refreshMutex.Unlock()
|
||||
if existed {
|
||||
if _, existed := rc.inFlightRefreshes.LoadAndDelete(tokenHash); existed {
|
||||
atomic.AddInt32(&rc.metrics.currentInFlightRefreshes, -1)
|
||||
}
|
||||
}
|
||||
|
||||
// isInCooldown checks if a session is in cooldown after recording an attempt
|
||||
func (rc *RefreshCoordinator) isInCooldown(sessionID string) bool {
|
||||
rc.attemptsMutex.Lock()
|
||||
defer rc.attemptsMutex.Unlock()
|
||||
// getOrCreateTracker fetches the tracker for sessionID or atomically creates a
|
||||
// fresh one. The sync.Map.LoadOrStore semantics make this lock-free even under
|
||||
// concurrent first-touch races: at most one tracker per sessionID survives.
|
||||
//
|
||||
// trackerFromMapValue centralizes the type assertion so the lint-mandated
|
||||
// two-value form lives in one place; the stored type is always
|
||||
// *refreshAttemptTracker by construction.
|
||||
func trackerFromMapValue(v interface{}) *refreshAttemptTracker {
|
||||
t, _ := v.(*refreshAttemptTracker)
|
||||
return t
|
||||
}
|
||||
|
||||
tracker, exists := rc.sessionRefreshAttempts[sessionID]
|
||||
if !exists {
|
||||
func (rc *RefreshCoordinator) getOrCreateTracker(sessionID string) *refreshAttemptTracker {
|
||||
if v, ok := rc.sessionRefreshAttempts.Load(sessionID); ok {
|
||||
return trackerFromMapValue(v)
|
||||
}
|
||||
fresh := &refreshAttemptTracker{
|
||||
windowStartNano: time.Now().UnixNano(),
|
||||
}
|
||||
actual, _ := rc.sessionRefreshAttempts.LoadOrStore(sessionID, fresh)
|
||||
return trackerFromMapValue(actual)
|
||||
}
|
||||
|
||||
// isInCooldown checks if a session is in cooldown. Lock-free read with a
|
||||
// best-effort cooldown-reset CAS on the cooldownEndNano sentinel. If the
|
||||
// reset races with another goroutine we accept the loser's view (the winner's
|
||||
// reset still happens). The attempt-window expiry and limit-exceeded paths
|
||||
// are write-mostly but use atomic.StoreInt64/AddInt32 — never a held lock.
|
||||
func (rc *RefreshCoordinator) isInCooldown(sessionID string) bool {
|
||||
v, ok := rc.sessionRefreshAttempts.Load(sessionID)
|
||||
if !ok {
|
||||
return false // No tracker means first attempt, not in cooldown
|
||||
}
|
||||
|
||||
tracker := trackerFromMapValue(v)
|
||||
now := time.Now()
|
||||
nowNano := now.UnixNano()
|
||||
|
||||
// Check if already in cooldown
|
||||
if tracker.inCooldown {
|
||||
if now.After(tracker.cooldownEndTime) {
|
||||
// Cooldown expired, reset tracker
|
||||
tracker.inCooldown = false
|
||||
tracker.attempts = 1 // Already recorded one attempt
|
||||
tracker.consecutiveFailures = 0
|
||||
tracker.windowStartTime = now
|
||||
return false
|
||||
// Already in cooldown?
|
||||
if cooldownEnd := atomic.LoadInt64(&tracker.cooldownEndNano); cooldownEnd != 0 {
|
||||
if nowNano <= cooldownEnd {
|
||||
return true // still in cooldown
|
||||
}
|
||||
// Cooldown expired. Best-effort reset (a concurrent caller may also
|
||||
// reset; the result is equivalent — fresh window + one recorded
|
||||
// attempt — so the CAS race is benign).
|
||||
if atomic.CompareAndSwapInt64(&tracker.cooldownEndNano, cooldownEnd, 0) {
|
||||
atomic.StoreInt32(&tracker.attempts, 1)
|
||||
atomic.StoreInt32(&tracker.consecutiveFailures, 0)
|
||||
atomic.StoreInt64(&tracker.windowStartNano, nowNano)
|
||||
}
|
||||
return true // Still in cooldown
|
||||
}
|
||||
|
||||
// Check if window expired
|
||||
if now.Sub(tracker.windowStartTime) > rc.config.RefreshAttemptWindow {
|
||||
// Reset window
|
||||
tracker.attempts = 1 // Already recorded one attempt
|
||||
tracker.windowStartTime = now
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if just exceeded attempt limit
|
||||
if int(tracker.attempts) >= rc.config.MaxRefreshAttempts {
|
||||
// Enter cooldown now
|
||||
tracker.inCooldown = true
|
||||
tracker.cooldownEndTime = now.Add(rc.config.RefreshCooldownPeriod)
|
||||
rc.logger.Infof("Session %s entering refresh cooldown after %d attempts",
|
||||
sessionID, tracker.attempts)
|
||||
// Window expired?
|
||||
if windowStart := atomic.LoadInt64(&tracker.windowStartNano); time.Duration(nowNano-windowStart) > rc.config.RefreshAttemptWindow {
|
||||
atomic.StoreInt32(&tracker.attempts, 1)
|
||||
atomic.StoreInt64(&tracker.windowStartNano, nowNano)
|
||||
return false
|
||||
}
|
||||
|
||||
// Just exceeded attempt limit?
|
||||
if int(atomic.LoadInt32(&tracker.attempts)) >= rc.config.MaxRefreshAttempts {
|
||||
end := now.Add(rc.config.RefreshCooldownPeriod).UnixNano()
|
||||
// Only one CAS winner publishes the cooldown end + logs.
|
||||
if atomic.CompareAndSwapInt64(&tracker.cooldownEndNano, 0, end) {
|
||||
rc.logger.Infof("Session %s entering refresh cooldown after %d attempts",
|
||||
sessionID, atomic.LoadInt32(&tracker.attempts))
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// recordRefreshAttempt records a refresh attempt for rate limiting
|
||||
// recordRefreshAttempt records a refresh attempt for rate limiting. Lock-free:
|
||||
// LoadOrStore for the tracker, atomic counters/timestamps for fields.
|
||||
func (rc *RefreshCoordinator) recordRefreshAttempt(sessionID string) {
|
||||
rc.attemptsMutex.Lock()
|
||||
defer rc.attemptsMutex.Unlock()
|
||||
|
||||
tracker, exists := rc.sessionRefreshAttempts[sessionID]
|
||||
if !exists {
|
||||
tracker = &refreshAttemptTracker{
|
||||
windowStartTime: time.Now(),
|
||||
}
|
||||
rc.sessionRefreshAttempts[sessionID] = tracker
|
||||
}
|
||||
|
||||
tracker := rc.getOrCreateTracker(sessionID)
|
||||
atomic.AddInt32(&tracker.attempts, 1)
|
||||
tracker.lastAttemptTime = time.Now()
|
||||
atomic.StoreInt64(&tracker.lastAttemptNano, time.Now().UnixNano())
|
||||
}
|
||||
|
||||
// recordRefreshSuccess records a successful refresh
|
||||
// recordRefreshSuccess records a successful refresh. Lock-free.
|
||||
func (rc *RefreshCoordinator) recordRefreshSuccess(sessionID string) {
|
||||
rc.attemptsMutex.Lock()
|
||||
defer rc.attemptsMutex.Unlock()
|
||||
|
||||
if tracker, exists := rc.sessionRefreshAttempts[sessionID]; exists {
|
||||
tracker.consecutiveFailures = 0
|
||||
if v, ok := rc.sessionRefreshAttempts.Load(sessionID); ok {
|
||||
atomic.StoreInt32(&trackerFromMapValue(v).consecutiveFailures, 0)
|
||||
}
|
||||
}
|
||||
|
||||
// recordRefreshFailure records a failed refresh
|
||||
// recordRefreshFailure records a failed refresh. Lock-free.
|
||||
func (rc *RefreshCoordinator) recordRefreshFailure(sessionID string) {
|
||||
rc.attemptsMutex.Lock()
|
||||
defer rc.attemptsMutex.Unlock()
|
||||
|
||||
if tracker, exists := rc.sessionRefreshAttempts[sessionID]; exists {
|
||||
atomic.AddInt32(&tracker.consecutiveFailures, 1)
|
||||
if v, ok := rc.sessionRefreshAttempts.Load(sessionID); ok {
|
||||
atomic.AddInt32(&trackerFromMapValue(v).consecutiveFailures, 1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -512,20 +578,22 @@ func (rc *RefreshCoordinator) cleanupRoutine() {
|
||||
}
|
||||
}
|
||||
|
||||
// cleanupStaleEntries removes outdated tracking entries
|
||||
// cleanupStaleEntries removes outdated tracking entries. Lock-free iteration
|
||||
// via sync.Map.Range; safe to race with concurrent reads/writes.
|
||||
func (rc *RefreshCoordinator) cleanupStaleEntries() {
|
||||
now := time.Now()
|
||||
|
||||
rc.attemptsMutex.Lock()
|
||||
defer rc.attemptsMutex.Unlock()
|
||||
|
||||
// Clean up old session trackers
|
||||
for sessionID, tracker := range rc.sessionRefreshAttempts {
|
||||
// Remove trackers that haven't been used recently
|
||||
if now.Sub(tracker.lastAttemptTime) > 2*rc.config.RefreshAttemptWindow {
|
||||
delete(rc.sessionRefreshAttempts, sessionID)
|
||||
cutoff := time.Now().Add(-2 * rc.config.RefreshAttemptWindow).UnixNano()
|
||||
rc.sessionRefreshAttempts.Range(func(key, value interface{}) bool {
|
||||
tracker := trackerFromMapValue(value)
|
||||
if tracker == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if atomic.LoadInt64(&tracker.lastAttemptNano) < cutoff {
|
||||
// Compare-and-delete to avoid evicting a tracker that was just
|
||||
// re-used by a concurrent caller. We compare by pointer identity.
|
||||
rc.sessionRefreshAttempts.CompareAndDelete(key, value)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// GetMetrics returns current coordinator metrics
|
||||
@@ -558,63 +626,51 @@ func (rc *RefreshCoordinator) Shutdown() {
|
||||
rc.wg.Wait()
|
||||
}
|
||||
|
||||
// AllowRequest checks if the circuit breaker allows a request
|
||||
// AllowRequest reports whether the circuit breaker allows a request. Lock-free.
|
||||
func (cb *RefreshCircuitBreaker) AllowRequest() bool {
|
||||
cb.mutex.RLock()
|
||||
defer cb.mutex.RUnlock()
|
||||
|
||||
state := atomic.LoadInt32(&cb.state)
|
||||
|
||||
switch state {
|
||||
case 0: // Closed
|
||||
switch atomic.LoadInt32(&cb.state) {
|
||||
case 0: // closed
|
||||
return true
|
||||
case 1: // Open
|
||||
if time.Since(cb.lastFailureTime) > cb.config.OpenDuration {
|
||||
// Try to transition to half-open
|
||||
case 1: // open
|
||||
lastFail := atomic.LoadInt64(&cb.lastFailureNano)
|
||||
if time.Duration(time.Now().UnixNano()-lastFail) > cb.config.OpenDuration {
|
||||
// Transition to half-open; first CAS winner gets the probe.
|
||||
if atomic.CompareAndSwapInt32(&cb.state, 1, 2) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case 2: // Half-open
|
||||
case 2: // half-open
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// RecordSuccess records a successful operation
|
||||
// RecordSuccess records a successful operation. Lock-free.
|
||||
func (cb *RefreshCircuitBreaker) RecordSuccess() {
|
||||
cb.mutex.Lock()
|
||||
defer cb.mutex.Unlock()
|
||||
|
||||
state := atomic.LoadInt32(&cb.state)
|
||||
if state == 2 { // Half-open
|
||||
// Close the circuit
|
||||
switch atomic.LoadInt32(&cb.state) {
|
||||
case 2: // half-open -> close
|
||||
atomic.StoreInt32(&cb.state, 0)
|
||||
atomic.StoreInt32(&cb.failures, 0)
|
||||
} else if state == 0 { // Closed
|
||||
// Reset failure count on success
|
||||
case 0: // closed
|
||||
atomic.StoreInt32(&cb.failures, 0)
|
||||
}
|
||||
cb.lastSuccessTime = time.Now()
|
||||
atomic.StoreInt64(&cb.lastSuccessNano, time.Now().UnixNano())
|
||||
}
|
||||
|
||||
// RecordFailure records a failed operation
|
||||
// RecordFailure records a failed operation. Lock-free.
|
||||
func (cb *RefreshCircuitBreaker) RecordFailure() {
|
||||
cb.mutex.Lock()
|
||||
defer cb.mutex.Unlock()
|
||||
|
||||
failures := atomic.AddInt32(&cb.failures, 1)
|
||||
cb.lastFailureTime = time.Now()
|
||||
atomic.StoreInt64(&cb.lastFailureNano, time.Now().UnixNano())
|
||||
|
||||
state := atomic.LoadInt32(&cb.state)
|
||||
|
||||
if state == 0 && int(failures) >= cb.config.MaxFailures {
|
||||
// Open the circuit
|
||||
atomic.StoreInt32(&cb.state, 1)
|
||||
} else if state == 2 {
|
||||
// Half-open failed, return to open
|
||||
switch atomic.LoadInt32(&cb.state) {
|
||||
case 0:
|
||||
if int(failures) >= cb.config.MaxFailures {
|
||||
atomic.StoreInt32(&cb.state, 1)
|
||||
}
|
||||
case 2:
|
||||
// Half-open probe failed -> back to open.
|
||||
atomic.StoreInt32(&cb.state, 1)
|
||||
}
|
||||
}
|
||||
|
||||
+16
-15
@@ -365,10 +365,12 @@ func TestMemoryLeakPrevention(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify cleanup is working
|
||||
coordinator.attemptsMutex.RLock()
|
||||
sessionCount := len(coordinator.sessionRefreshAttempts)
|
||||
coordinator.attemptsMutex.RUnlock()
|
||||
// Verify cleanup is working. sync.Map has no Len(); count via Range.
|
||||
sessionCount := 0
|
||||
coordinator.sessionRefreshAttempts.Range(func(_, _ interface{}) bool {
|
||||
sessionCount++
|
||||
return true
|
||||
})
|
||||
|
||||
// Should have cleaned up old sessions (only recent ones remain)
|
||||
if sessionCount > numWorkers*2 {
|
||||
@@ -650,24 +652,23 @@ func TestCleanupRoutine(t *testing.T) {
|
||||
coordinator.recordRefreshAttempt(fmt.Sprintf("session_%d", i))
|
||||
}
|
||||
|
||||
// Verify sessions exist
|
||||
coordinator.attemptsMutex.RLock()
|
||||
initialCount := len(coordinator.sessionRefreshAttempts)
|
||||
coordinator.attemptsMutex.RUnlock()
|
||||
countSessions := func() int {
|
||||
n := 0
|
||||
coordinator.sessionRefreshAttempts.Range(func(_, _ interface{}) bool {
|
||||
n++
|
||||
return true
|
||||
})
|
||||
return n
|
||||
}
|
||||
|
||||
if initialCount != 5 {
|
||||
if initialCount := countSessions(); initialCount != 5 {
|
||||
t.Errorf("Expected 5 sessions, got %d", initialCount)
|
||||
}
|
||||
|
||||
// Wait for cleanup to run (2x window + cleanup interval)
|
||||
time.Sleep(2*config.RefreshAttemptWindow + 2*config.CleanupInterval)
|
||||
|
||||
// Verify sessions were cleaned up
|
||||
coordinator.attemptsMutex.RLock()
|
||||
finalCount := len(coordinator.sessionRefreshAttempts)
|
||||
coordinator.attemptsMutex.RUnlock()
|
||||
|
||||
if finalCount != 0 {
|
||||
if finalCount := countSessions(); finalCount != 0 {
|
||||
t.Errorf("Expected 0 sessions after cleanup, got %d", finalCount)
|
||||
}
|
||||
}
|
||||
|
||||
+76
-17
@@ -63,23 +63,23 @@ type Config struct {
|
||||
// IdPs do not expose RT TTL on the wire, so this is intentionally a
|
||||
// conservative heuristic; tune to match your provider configuration.
|
||||
// Default 21600 (6h). Set to 0 to disable the check.
|
||||
MaxRefreshTokenAgeSeconds int `json:"maxRefreshTokenAgeSeconds"`
|
||||
SessionMaxAge int `json:"sessionMaxAge"`
|
||||
RateLimit int `json:"rateLimit"`
|
||||
OverrideScopes bool `json:"overrideScopes"`
|
||||
DisableReplayDetection bool `json:"disableReplayDetection,omitempty"`
|
||||
RequireTokenIntrospection bool `json:"requireTokenIntrospection,omitempty"`
|
||||
AllowOpaqueTokens bool `json:"allowOpaqueTokens,omitempty"`
|
||||
StrictAudienceValidation bool `json:"strictAudienceValidation,omitempty"`
|
||||
EnablePKCE bool `json:"enablePKCE"`
|
||||
ForceHTTPS bool `json:"forceHTTPS"`
|
||||
AllowPrivateIPAddresses bool `json:"allowPrivateIPAddresses,omitempty"`
|
||||
MinimalHeaders bool `json:"minimalHeaders,omitempty"`
|
||||
StripAuthCookies bool `json:"stripAuthCookies,omitempty"`
|
||||
EnableBackchannelLogout bool `json:"enableBackchannelLogout,omitempty"`
|
||||
EnableFrontchannelLogout bool `json:"enableFrontchannelLogout,omitempty"`
|
||||
BackchannelLogoutURL string `json:"backchannelLogoutURL,omitempty"`
|
||||
FrontchannelLogoutURL string `json:"frontchannelLogoutURL,omitempty"`
|
||||
MaxRefreshTokenAgeSeconds int `json:"maxRefreshTokenAgeSeconds"`
|
||||
SessionMaxAge int `json:"sessionMaxAge"`
|
||||
RateLimit int `json:"rateLimit"`
|
||||
OverrideScopes bool `json:"overrideScopes"`
|
||||
DisableReplayDetection bool `json:"disableReplayDetection,omitempty"`
|
||||
RequireTokenIntrospection bool `json:"requireTokenIntrospection,omitempty"`
|
||||
AllowOpaqueTokens bool `json:"allowOpaqueTokens,omitempty"`
|
||||
StrictAudienceValidation bool `json:"strictAudienceValidation,omitempty"`
|
||||
EnablePKCE bool `json:"enablePKCE"`
|
||||
ForceHTTPS bool `json:"forceHTTPS"`
|
||||
AllowPrivateIPAddresses bool `json:"allowPrivateIPAddresses,omitempty"`
|
||||
MinimalHeaders bool `json:"minimalHeaders,omitempty"`
|
||||
StripAuthCookies bool `json:"stripAuthCookies,omitempty"`
|
||||
EnableBackchannelLogout bool `json:"enableBackchannelLogout,omitempty"`
|
||||
EnableFrontchannelLogout bool `json:"enableFrontchannelLogout,omitempty"`
|
||||
BackchannelLogoutURL string `json:"backchannelLogoutURL,omitempty"`
|
||||
FrontchannelLogoutURL string `json:"frontchannelLogoutURL,omitempty"`
|
||||
// CACertPath is an optional filesystem path to a PEM-encoded CA bundle used
|
||||
// to verify the OIDC provider's TLS certificate. Use this when the provider
|
||||
// is signed by an internal/private CA that is not in the system trust store.
|
||||
@@ -125,6 +125,52 @@ type Config struct {
|
||||
// ClientAssertionAlg is the JWS signing algorithm. Defaults to RS256.
|
||||
// Supported: RS256/384/512, PS256/384/512, ES256/384/512.
|
||||
ClientAssertionAlg string `json:"clientAssertionAlg,omitempty"`
|
||||
|
||||
// --- Bearer-token auth (opt-in M2M path) ---
|
||||
|
||||
// EnableBearerAuth turns on the Authorization: Bearer <jwt> auth path.
|
||||
// Default false. When true, Audience MUST be set or startup fails. The
|
||||
// bearer path is M2M-only: it accepts validated access-token JWTs, rejects
|
||||
// ID tokens, and forwards principal headers downstream without creating a
|
||||
// cookie session. See docs/BEARER_AUTH.md for the threat model.
|
||||
EnableBearerAuth bool `json:"enableBearerAuth,omitempty"`
|
||||
// BearerIdentifierClaim names the JWT claim used as the principal identifier
|
||||
// on the bearer-token auth path. Default "sub". Decoupled from
|
||||
// UserIdentifierClaim (which defaults to "email" and drives the cookie path)
|
||||
// so M2M bearer flow never accidentally relies on an unverified email.
|
||||
BearerIdentifierClaim string `json:"bearerIdentifierClaim,omitempty"`
|
||||
// StripAuthorizationHeader removes the Authorization header from the
|
||||
// forwarded request after successful bearer auth, so downstream services
|
||||
// never see the raw token. Default true. Disable only when a downstream
|
||||
// explicitly needs to re-validate the bearer.
|
||||
StripAuthorizationHeader bool `json:"stripAuthorizationHeader,omitempty"`
|
||||
// BearerEmitWWWAuthenticate controls whether 401 responses on the bearer
|
||||
// path include a WWW-Authenticate: Bearer error="invalid_token" hint per
|
||||
// RFC 6750 §3. Default true. Disable to reduce reconnaissance signal.
|
||||
BearerEmitWWWAuthenticate bool `json:"bearerEmitWWWAuthenticate,omitempty"`
|
||||
// BearerOverridesCookie controls precedence when both Authorization:
|
||||
// Bearer and a session cookie are present. Default false: cookie wins
|
||||
// (safer against browser/extension/proxy bearer injection). Set true for
|
||||
// the bearer-wins convention used by AWS/GCP/Kubernetes API gateways.
|
||||
BearerOverridesCookie bool `json:"bearerOverridesCookie,omitempty"`
|
||||
// MaxTokenAgeSeconds caps how old (iat-based) a bearer token may be.
|
||||
// Default 86400 (24h). Bounds clock-manipulation tokens with implausibly
|
||||
// distant iat values.
|
||||
MaxTokenAgeSeconds int64 `json:"maxTokenAgeSeconds,omitempty"`
|
||||
// MaxIdentifierLength bounds the post-sanitisation length of the bearer
|
||||
// principal identifier (the value injected as X-Forwarded-User). Default
|
||||
// 256.
|
||||
MaxIdentifierLength int `json:"maxIdentifierLength,omitempty"`
|
||||
// BearerFailureThreshold is the number of consecutive 401s from one
|
||||
// source IP within BearerFailureWindowSeconds that trips the throttle.
|
||||
// Default 20.
|
||||
BearerFailureThreshold int `json:"bearerFailureThreshold,omitempty"`
|
||||
// BearerFailureWindowSeconds is the rolling window (seconds) over which
|
||||
// 401s are counted for throttling. Default 60.
|
||||
BearerFailureWindowSeconds int `json:"bearerFailureWindowSeconds,omitempty"`
|
||||
// BearerFailurePenaltySeconds is how long an IP is parked in the 429
|
||||
// penalty box after BearerFailureThreshold is exceeded. Default 60.
|
||||
BearerFailurePenaltySeconds int `json:"bearerFailurePenaltySeconds,omitempty"`
|
||||
}
|
||||
|
||||
// loadCACertPool assembles an x509.CertPool from CACertPath and CACertPEM.
|
||||
@@ -291,6 +337,19 @@ func CreateConfig() *Config {
|
||||
MaxRefreshTokenAgeSeconds: 21600, // 6h - conservative heuristic, see field doc
|
||||
SecurityHeaders: createDefaultSecurityConfig(),
|
||||
Redis: nil, // Redis is disabled by default, configure via Traefik or env vars
|
||||
|
||||
// Bearer-auth defaults. EnableBearerAuth=false leaves the feature
|
||||
// dormant; the rest are values that apply only when bearer is enabled.
|
||||
EnableBearerAuth: false,
|
||||
BearerIdentifierClaim: "sub",
|
||||
StripAuthorizationHeader: true,
|
||||
BearerEmitWWWAuthenticate: true,
|
||||
BearerOverridesCookie: false,
|
||||
MaxTokenAgeSeconds: 86400,
|
||||
MaxIdentifierLength: 256,
|
||||
BearerFailureThreshold: 20,
|
||||
BearerFailureWindowSeconds: 60,
|
||||
BearerFailurePenaltySeconds: 60,
|
||||
}
|
||||
|
||||
return c
|
||||
|
||||
+142
@@ -0,0 +1,142 @@
|
||||
package traefikoidc
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// pluginVersion is bumped manually on each release. Keep in sync with the
|
||||
// most recent git tag (see `git tag --sort=-v:refname | head -1`).
|
||||
const pluginVersion = "1.0.11"
|
||||
|
||||
const (
|
||||
telemetryProject = "traefikoidc"
|
||||
telemetryTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
// telemetryEndpoint is intentionally a var rather than a const so the test
|
||||
// suite in this package can retarget it at an httptest server. Production
|
||||
// code never mutates it.
|
||||
var telemetryEndpoint = "https://oss.raczylo.com/v1/ping"
|
||||
|
||||
// telemetryOnce guarantees a single anonymous "plugin loaded" ping per
|
||||
// process lifetime. Traefik can instantiate a middleware many times per
|
||||
// process (one per route using the plugin); the sync.Once gate keeps the
|
||||
// fire-and-forget call from amplifying into many pings.
|
||||
//
|
||||
// Reset in tests via `telemetryOnce = sync.Once{}`.
|
||||
var telemetryOnce sync.Once
|
||||
|
||||
// telemetryInflight tracks any background goroutine started by sendTelemetry.
|
||||
// Tests Wait on it to drain in-flight goroutines before mutating package
|
||||
// state. Production code never calls Wait — the goroutine is fire-and-forget.
|
||||
var telemetryInflight sync.WaitGroup
|
||||
|
||||
// sendTelemetry fires one anonymous usage ping in the background. It is
|
||||
// failproof by contract:
|
||||
//
|
||||
// - never blocks the caller
|
||||
// - never panics (the goroutine recovers internally)
|
||||
// - never returns errors
|
||||
// - silently dropped on invalid input, env-driven opt-out, or network failure
|
||||
//
|
||||
// Opt-out is honored via any of:
|
||||
//
|
||||
// - DO_NOT_TRACK=1
|
||||
// - OSS_TELEMETRY_DISABLED=1
|
||||
// - TRAEFIKOIDC_DISABLE_TELEMETRY=1
|
||||
//
|
||||
// Yaegi note: this file deliberately avoids generics (atomic.Pointer[T]) and
|
||||
// range-over-int (Go 1.22) so it interprets under any reasonably recent
|
||||
// Traefik yaegi runtime.
|
||||
func sendTelemetry(version string) {
|
||||
telemetryOnce.Do(func() {
|
||||
if telemetryDisabledByEnv() {
|
||||
return
|
||||
}
|
||||
if !validTelemetryVersion(version) {
|
||||
return
|
||||
}
|
||||
telemetryInflight.Add(1)
|
||||
go func() {
|
||||
defer telemetryInflight.Done()
|
||||
defer func() { _ = recover() }()
|
||||
doTelemetryPost(version)
|
||||
}()
|
||||
})
|
||||
}
|
||||
|
||||
func telemetryDisabledByEnv() bool {
|
||||
keys := []string{
|
||||
"DO_NOT_TRACK",
|
||||
"OSS_TELEMETRY_DISABLED",
|
||||
"TRAEFIKOIDC_DISABLE_TELEMETRY",
|
||||
}
|
||||
for _, k := range keys {
|
||||
v := strings.ToLower(strings.TrimSpace(os.Getenv(k)))
|
||||
if v == "1" || v == "true" || v == "yes" || v == "on" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// validTelemetryVersion mirrors the server-side regex ^[A-Za-z0-9.+_-]{1,32}$
|
||||
// using a byte loop. No allocation, no regexp dependency.
|
||||
//
|
||||
// Yaegi note: written as an `||` chain rather than `switch{case A,B,C:}` —
|
||||
// some yaegi releases mis-evaluate comma-separated case expressions in
|
||||
// switch-true blocks, returning false for all inputs.
|
||||
func validTelemetryVersion(v string) bool {
|
||||
if len(v) == 0 || len(v) > 32 {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(v); i++ {
|
||||
c := v[i]
|
||||
ok := (c >= 'A' && c <= 'Z') ||
|
||||
(c >= 'a' && c <= 'z') ||
|
||||
(c >= '0' && c <= '9') ||
|
||||
c == '.' || c == '+' || c == '_' || c == '-'
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// doTelemetryPost builds the JSON body manually. The project name is a
|
||||
// constant and the version is pre-validated against an ASCII-only allowlist,
|
||||
// so direct concatenation needs no JSON escaping.
|
||||
func doTelemetryPost(version string) {
|
||||
body := make([]byte, 0, 96)
|
||||
body = append(body, `{"project":"`...)
|
||||
body = append(body, telemetryProject...)
|
||||
body = append(body, `","version":"`...)
|
||||
body = append(body, version...)
|
||||
body = append(body, `","ts":`...)
|
||||
body = strconv.AppendInt(body, time.Now().Unix(), 10)
|
||||
body = append(body, '}')
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), telemetryTimeout)
|
||||
defer cancel()
|
||||
|
||||
url := telemetryEndpoint
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
client := &http.Client{Timeout: telemetryTimeout}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
package traefikoidc
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// resetTelemetryState restores package-level mutable state so tests do not
|
||||
// contaminate one another. The cleanup waits for any in-flight ping goroutine
|
||||
// to finish before restoring telemetryEndpoint — without that drain step the
|
||||
// goroutine and the cleanup would race on the var.
|
||||
func resetTelemetryState(t *testing.T) {
|
||||
t.Helper()
|
||||
telemetryOnce = sync.Once{}
|
||||
prev := telemetryEndpoint
|
||||
t.Cleanup(func() {
|
||||
telemetryInflight.Wait()
|
||||
telemetryEndpoint = prev
|
||||
telemetryOnce = sync.Once{}
|
||||
})
|
||||
}
|
||||
|
||||
func newTelemetryServer(t *testing.T, status int) (hits *int32, lastBody func() string) {
|
||||
t.Helper()
|
||||
var counter int32
|
||||
var mu sync.Mutex
|
||||
var body string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
atomic.AddInt32(&counter, 1)
|
||||
b, _ := io.ReadAll(r.Body)
|
||||
_ = r.Body.Close()
|
||||
mu.Lock()
|
||||
body = string(b)
|
||||
mu.Unlock()
|
||||
w.WriteHeader(status)
|
||||
}))
|
||||
telemetryEndpoint = srv.URL
|
||||
t.Cleanup(srv.Close)
|
||||
return &counter, func() string {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
return body
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidTelemetryVersion(t *testing.T) {
|
||||
good := []string{"1.2.3", "1.4.0-beta1", "2.0", "v1.0.0", "1.0.0+meta", "dev"}
|
||||
for _, v := range good {
|
||||
if !validTelemetryVersion(v) {
|
||||
t.Errorf("validTelemetryVersion(%q) = false, want true", v)
|
||||
}
|
||||
}
|
||||
bad := []string{"", "has space", "semi;colon", strings.Repeat("1", 33)}
|
||||
for _, v := range bad {
|
||||
if validTelemetryVersion(v) {
|
||||
t.Errorf("validTelemetryVersion(%q) = true, want false", v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTelemetryDisabledByEnv(t *testing.T) {
|
||||
for _, k := range []string{"DO_NOT_TRACK", "OSS_TELEMETRY_DISABLED", "TRAEFIKOIDC_DISABLE_TELEMETRY"} {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
t.Setenv(k, "1")
|
||||
if !telemetryDisabledByEnv() {
|
||||
t.Fatalf("%s=1 should disable", k)
|
||||
}
|
||||
})
|
||||
}
|
||||
t.Run("falsy_values_do_not_disable", func(t *testing.T) {
|
||||
t.Setenv("DO_NOT_TRACK", "0")
|
||||
t.Setenv("OSS_TELEMETRY_DISABLED", "false")
|
||||
t.Setenv("TRAEFIKOIDC_DISABLE_TELEMETRY", "no")
|
||||
if telemetryDisabledByEnv() {
|
||||
t.Fatal("falsy env values should not disable")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSendTelemetry_FiresOnceAcrossManyCalls(t *testing.T) {
|
||||
resetTelemetryState(t)
|
||||
hits, lastBody := newTelemetryServer(t, http.StatusNoContent)
|
||||
|
||||
for i := 0; i < 50; i++ {
|
||||
sendTelemetry("1.2.3")
|
||||
}
|
||||
telemetryInflight.Wait()
|
||||
|
||||
if got := atomic.LoadInt32(hits); got != 1 {
|
||||
t.Fatalf("expected exactly 1 hit, got %d", got)
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Project string `json:"project"`
|
||||
Version string `json:"version"`
|
||||
Ts int64 `json:"ts"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(lastBody()), &payload); err != nil {
|
||||
t.Fatalf("server received non-JSON body: %q (err: %v)", lastBody(), err)
|
||||
}
|
||||
if payload.Project != "traefikoidc" || payload.Version != "1.2.3" || payload.Ts <= 0 {
|
||||
t.Fatalf("unexpected payload: %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendTelemetry_RespectsDisableEnv(t *testing.T) {
|
||||
resetTelemetryState(t)
|
||||
hits, _ := newTelemetryServer(t, http.StatusNoContent)
|
||||
t.Setenv("DO_NOT_TRACK", "1")
|
||||
|
||||
sendTelemetry("1.2.3")
|
||||
telemetryInflight.Wait()
|
||||
|
||||
if got := atomic.LoadInt32(hits); got != 0 {
|
||||
t.Fatalf("DO_NOT_TRACK should suppress; got %d hits", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendTelemetry_DropsInvalidVersion(t *testing.T) {
|
||||
resetTelemetryState(t)
|
||||
hits, _ := newTelemetryServer(t, http.StatusNoContent)
|
||||
|
||||
sendTelemetry("has space")
|
||||
telemetryInflight.Wait()
|
||||
|
||||
if got := atomic.LoadInt32(hits); got != 0 {
|
||||
t.Fatalf("invalid version should suppress; got %d hits", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendTelemetry_DoesNotBlock(t *testing.T) {
|
||||
resetTelemetryState(t)
|
||||
// Hanging server proves the caller is never blocked. The 2s context
|
||||
// timeout in doTelemetryPost ensures the goroutine eventually exits;
|
||||
// resetTelemetryState's cleanup waits for that drain before restoring
|
||||
// telemetryEndpoint so there is no race with this test's mutation.
|
||||
hung := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {
|
||||
time.Sleep(5 * time.Second)
|
||||
}))
|
||||
t.Cleanup(hung.Close)
|
||||
telemetryEndpoint = hung.URL
|
||||
|
||||
start := time.Now()
|
||||
sendTelemetry("1.2.3")
|
||||
if elapsed := time.Since(start); elapsed > 50*time.Millisecond {
|
||||
t.Fatalf("sendTelemetry blocked for %v, expected near-instant return", elapsed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendTelemetry_SurvivesServerError(t *testing.T) {
|
||||
resetTelemetryState(t)
|
||||
hits, _ := newTelemetryServer(t, http.StatusInternalServerError)
|
||||
|
||||
sendTelemetry("1.2.3")
|
||||
telemetryInflight.Wait()
|
||||
|
||||
if got := atomic.LoadInt32(hits); got != 1 {
|
||||
t.Fatalf("request should still reach server even on 500; got %d hits", got)
|
||||
}
|
||||
}
|
||||
+89
-4
@@ -29,6 +29,29 @@ import (
|
||||
//
|
||||
//nolint:gocognit,gocyclo // Complex token verification logic requires multiple security checks
|
||||
func (t *TraefikOidc) VerifyToken(token string) error {
|
||||
return t.verifyTokenWithOpts(token, verifyOpts{})
|
||||
}
|
||||
|
||||
// verifyOpts are internal-only knobs for verifyTokenWithOpts. Kept unexported
|
||||
// because they expose subtle replay-protection semantics that are dangerous
|
||||
// to misuse.
|
||||
type verifyOpts struct {
|
||||
// skipReplayMarking suppresses the JTI -> blacklist Set near the bottom
|
||||
// of verifyTokenWithOpts. The Get at the top remains active, so revoked
|
||||
// tokens (added to the blacklist by RevokeToken) are still rejected.
|
||||
// Used exclusively by the bearer-auth path, where bearer tokens are
|
||||
// designed to be reused until exp.
|
||||
skipReplayMarking bool
|
||||
}
|
||||
|
||||
// verifyTokenWithOpts runs the full token verification pipeline used by both
|
||||
// the cookie path and the bearer path. The cookie path uses the zero-value
|
||||
// opts; the bearer path sets skipReplayMarking=true. See the security spec
|
||||
// (docs/superpowers/specs/2026-05-18-bearer-token-auth-design.md §7.7) for
|
||||
// the exact contract: skipReplayMarking gates ONLY the JTI Set, never the Get.
|
||||
//
|
||||
//nolint:gocognit,gocyclo // Complex token verification logic requires multiple security checks
|
||||
func (t *TraefikOidc) verifyTokenWithOpts(token string, opts verifyOpts) error {
|
||||
if token == "" {
|
||||
return fmt.Errorf("invalid JWT format: token is empty")
|
||||
}
|
||||
@@ -76,7 +99,9 @@ func (t *TraefikOidc) VerifyToken(token string) error {
|
||||
}
|
||||
|
||||
// Only check JTI blacklist for tokens that aren't already in the cache
|
||||
// This is for FIRST-TIME validation to detect replay attacks
|
||||
// This is for FIRST-TIME validation to detect replay attacks. The
|
||||
// blacklist Get is ALWAYS active on the bearer path too — only the
|
||||
// Set below is gated by opts.skipReplayMarking.
|
||||
if jti, ok := parsedJWT.Claims["jti"].(string); ok && jti != "" {
|
||||
// Skip JTI blacklist check if replay detection is disabled
|
||||
if !t.disableReplayDetection {
|
||||
@@ -105,8 +130,12 @@ func (t *TraefikOidc) VerifyToken(token string) error {
|
||||
|
||||
t.cacheVerifiedToken(token, jwt.Claims)
|
||||
|
||||
if jti, ok := jwt.Claims["jti"].(string); ok && jti != "" && !t.disableReplayDetection {
|
||||
// Only add to blacklist if replay detection is enabled
|
||||
// Replay marking: add JTI to blacklist so subsequent presentations of
|
||||
// the SAME token can short-circuit via cache. Bearer path suppresses
|
||||
// this Set (opts.skipReplayMarking=true) because bearer tokens are
|
||||
// designed for reuse until exp; the cache-evict-then-replay scenario
|
||||
// would otherwise trigger false replay detection.
|
||||
if jti, ok := jwt.Claims["jti"].(string); ok && jti != "" && !t.disableReplayDetection && !opts.skipReplayMarking {
|
||||
expiry := time.Now().Add(defaultBlacklistDuration)
|
||||
if expClaim, expOk := jwt.Claims["exp"].(float64); expOk {
|
||||
expTime := time.Unix(int64(expClaim), 0)
|
||||
@@ -341,7 +370,17 @@ func (t *TraefikOidc) VerifyJWTSignatureAndClaims(jwt *JWT, token string) error
|
||||
|
||||
if err := verifySignatureWithKey(token, pubKey, alg); err != nil {
|
||||
if !t.suppressDiagnosticLogs {
|
||||
t.safeLogErrorf("DIAGNOSTIC: Signature verification failed for kid=%s, alg=%s: %v", kid, alg, err)
|
||||
// Microsoft Graph access tokens carry a `nonce` JWT header and are
|
||||
// signed in a proprietary form Microsoft documents as unverifiable
|
||||
// by client applications. They reach this path only when the
|
||||
// per-provider classifier (validateAzureTokens) didn't catch them,
|
||||
// so log at debug to keep the error stream actionable while still
|
||||
// surfacing the cause for diagnostics.
|
||||
if _, isMSProprietary := jwt.Header["nonce"]; isMSProprietary {
|
||||
t.safeLogDebugf("DIAGNOSTIC: Signature verification failed for kid=%s, alg=%s (Microsoft proprietary nonce header — token is opaque to clients): %v", kid, alg, err)
|
||||
} else {
|
||||
t.safeLogErrorf("DIAGNOSTIC: Signature verification failed for kid=%s, alg=%s: %v", kid, alg, err)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("signature verification failed: %w", err)
|
||||
}
|
||||
@@ -785,6 +824,27 @@ func (t *TraefikOidc) isGoogleProvider() bool {
|
||||
return strings.Contains(issuerURL, "google") || strings.Contains(issuerURL, "accounts.google.com")
|
||||
}
|
||||
|
||||
// isUnverifiableAzureAccessToken reports whether a JWT-shaped access token
|
||||
// matches the Microsoft proprietary format that client applications must not
|
||||
// validate. Microsoft injects a `nonce` value into the JWT header, signs over
|
||||
// the SHA256 hash of that nonce, and ships the original nonce on the wire,
|
||||
// guaranteeing that any standard JWS verifier rejects the signature. This is
|
||||
// the documented mechanism that keeps access tokens opaque to non-resource
|
||||
// holders (Microsoft Graph, Azure Management API).
|
||||
//
|
||||
// https://learn.microsoft.com/en-us/entra/identity-platform/access-tokens
|
||||
//
|
||||
// Returns true on parse failure as well — a token we cannot parse should not
|
||||
// be passed through the verification path that emits ERROR logs.
|
||||
func (t *TraefikOidc) isUnverifiableAzureAccessToken(token string) bool {
|
||||
parsed, err := parseJWT(token)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
_, hasProprietaryNonce := parsed.Header["nonce"]
|
||||
return hasProprietaryNonce
|
||||
}
|
||||
|
||||
// isAzureProvider detects if the configured OIDC provider is Azure AD.
|
||||
// It checks the issuer URL for Microsoft Azure AD domains.
|
||||
// Returns:
|
||||
@@ -827,6 +887,31 @@ func (t *TraefikOidc) validateAzureTokens(session *SessionData) (bool, bool, boo
|
||||
|
||||
if accessToken != "" {
|
||||
if strings.Count(accessToken, ".") == 2 {
|
||||
// Microsoft documents that client apps cannot validate access
|
||||
// tokens issued for Microsoft-owned APIs (Graph, Azure Mgmt) due
|
||||
// to their proprietary signing format (nonce in JWT header is
|
||||
// the marker — signed bytes hash the nonce, wire bytes ship the
|
||||
// raw value, so rsa verification always fails). Treat such
|
||||
// tokens as opaque, matching Microsoft's guidance and avoiding
|
||||
// per-request signature-error log spam (issue #134 followup).
|
||||
//
|
||||
// https://learn.microsoft.com/en-us/entra/identity-platform/access-tokens
|
||||
// "you can't validate tokens for Microsoft Graph according to
|
||||
// these rules due to their proprietary format"
|
||||
if t.isUnverifiableAzureAccessToken(accessToken) {
|
||||
t.logger.Debug("Azure access token is Microsoft-proprietary (Graph/Mgmt) — treating as opaque per Microsoft guidance")
|
||||
if idToken != "" {
|
||||
if err := t.verifyToken(idToken); err != nil {
|
||||
t.logger.Debugf("Azure: ID token validation failed while access token was opaque: %v", err)
|
||||
if session.GetRefreshToken() != "" {
|
||||
return false, true, false
|
||||
}
|
||||
return false, false, true
|
||||
}
|
||||
return t.validateTokenExpiry(session, idToken)
|
||||
}
|
||||
return true, false, false
|
||||
}
|
||||
if err := t.verifyToken(accessToken); err != nil {
|
||||
if idToken != "" {
|
||||
if err := t.verifyToken(idToken); err != nil {
|
||||
|
||||
@@ -65,7 +65,19 @@ type ProviderMetadata struct {
|
||||
// the complete authentication flow. It's designed to work seamlessly with Traefik's
|
||||
// plugin system and provides flexible configuration options.
|
||||
type TraefikOidc struct {
|
||||
lastMetadataRetryTime time.Time
|
||||
// lastMetadataRetryNano is the UnixNano timestamp of the last metadata
|
||||
// recovery attempt. Stored atomically so the hot ServeHTTP path can
|
||||
// throttle retries without acquiring metadataRetryMutex on every request.
|
||||
lastMetadataRetryNano int64
|
||||
// firstRequestStarted is 0 until the very first non-health request fires
|
||||
// the background-task bootstrap; then it flips to 1 via CAS. Replaces the
|
||||
// firstRequestMutex + firstRequestReceived combo which previously took
|
||||
// a write lock on every non-health request forever.
|
||||
firstRequestStarted int32
|
||||
// metadataRefreshStartedAtomic is the CAS-only variant of the old
|
||||
// metadataRefreshStarted bool. Both flags live under the same atomic so
|
||||
// concurrent first-request goroutines race exactly once.
|
||||
metadataRefreshStartedAtomic int32
|
||||
jwkCache JWKCacheInterface
|
||||
jwtVerifier JWTVerifier
|
||||
ctx context.Context
|
||||
@@ -130,17 +142,13 @@ type TraefikOidc struct {
|
||||
maxRefreshTokenAge time.Duration
|
||||
metadataMu sync.RWMutex
|
||||
shutdownOnce sync.Once
|
||||
metadataRetryMutex sync.Mutex
|
||||
firstRequestMutex sync.Mutex
|
||||
sessionInvalidationCache CacheInterface
|
||||
refreshResultCache CacheInterface
|
||||
minimalHeaders bool
|
||||
stripAuthCookies bool
|
||||
enableBackchannelLogout bool
|
||||
enableFrontchannelLogout bool
|
||||
firstRequestReceived bool
|
||||
requireTokenIntrospection bool
|
||||
metadataRefreshStarted bool
|
||||
allowPrivateIPAddresses bool
|
||||
disableReplayDetection bool
|
||||
allowOpaqueTokens bool
|
||||
@@ -149,4 +157,17 @@ type TraefikOidc struct {
|
||||
enablePKCE bool
|
||||
forceHTTPS bool
|
||||
suppressDiagnosticLogs bool
|
||||
|
||||
// Bearer-auth runtime state (populated only when EnableBearerAuth=true).
|
||||
bearerIdentifierClaim string
|
||||
bearerFailureTracker *bearerFailureTracker
|
||||
maxTokenAge time.Duration
|
||||
maxIdentifierLength int
|
||||
bearerFailureThreshold int
|
||||
bearerFailureWindow time.Duration
|
||||
bearerFailurePenalty time.Duration
|
||||
enableBearerAuth bool
|
||||
stripAuthorizationHeader bool
|
||||
bearerEmitWWWAuthenticate bool
|
||||
bearerOverridesCookie bool
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user