From f821b8829b32772337436a85a230d369dc81e489 Mon Sep 17 00:00:00 2001 From: Lukasz Raczylo Date: Mon, 25 May 2026 00:06:47 +0100 Subject: [PATCH] fix: remove write-lock convoy in getLocal + fix mutateState CAS bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UniversalCache.getLocal(): when a cached token expires, the RLock fast path (line 385-398) previously fell through to c.mu.Lock() (write lock). Under Yaegi, the write-lock holder takes 10-100ms for LRU manipulation, and Go's RWMutex writer-priority blocks ALL new RLock callers. A single expired-token event turned every concurrent request from read-parallel into write-serialized — the convoy that produced the 737-goroutine pileup at 0x400275a608 (pprof captured at /tmp/traefik-spike-1779663149). Fix: return (nil, false) immediately on expiry for Token/JWK/Session cache types. The periodic cleanup goroutine handles eviction. Write lock is never taken on the read path for these cache types. refreshAttemptTracker.mutateState(): the CAS loop used t.state.CompareAndSwap(t.state.Load(), next) — a second Load that can see a different value from a concurrent writer, silently overwriting their update. Fixed to CompareAndSwap(cur, next) using the snapshot we computed the mutation from. --- refresh_coordinator.go | 2 +- universal_cache.go | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/refresh_coordinator.go b/refresh_coordinator.go index 236d96b..254e690 100644 --- a/refresh_coordinator.go +++ b/refresh_coordinator.go @@ -498,7 +498,7 @@ func (t *refreshAttemptTracker) mutateState(mutate func(cur *attemptState) *atte if next == nil { return cur } - if t.state.CompareAndSwap(t.state.Load(), next) { + if t.state.CompareAndSwap(cur, next) { return next } } diff --git a/universal_cache.go b/universal_cache.go index aa4adad..827aaf8 100644 --- a/universal_cache.go +++ b/universal_cache.go @@ -396,8 +396,16 @@ func (c *UniversalCache) getLocal(key string) (interface{}, bool) { return value, true } c.mu.RUnlock() - // Expired — fall through to the write-locked slow path below to - // remove the entry under exclusive access. + // Expired — return miss immediately. The periodic cleanup goroutine + // will evict the stale entry. NEVER fall through to the write-locked + // slow path for Token/JWK/Session caches: under Yaegi the write Lock + // at line 403 costs 10-100ms per acquisition, and Go's RWMutex + // writer-priority semantics block ALL new RLock callers while a Lock + // is pending. A single expired-token event turns every concurrent + // request from read-parallel into write-serialized — the exact + // convoy that produced the 737-goroutine pileup at 0x400275a608. + atomic.AddInt64(&c.misses, 1) + return nil, false } c.mu.Lock()