Files
kubemirror/pkg/circuitbreaker/circuitbreaker.go
T
lukaszraczylo 096dca47d1 improvements jan2025 (#6)
* feat(controller): add lazy watcher, improve resource usage and add pattern validation

- [x] Add cache sync health check for readiness probe verification
- [x] Create namespace lister with API reader support for fresh label queries
- [x] Add pattern validation with warning logs for invalid glob patterns
- [x] Implement lazy watcher initialization mode to scan for active resources
- [x] Add requeue delay to namespace reconciler for cache settlement
- [x] Replace custom containsString with slices.Contains from stdlib
- [x] Add structured logging context to reconcilers (kind, group, version)
- [x] Improve error variable naming for clarity in nested conditions
- [x] Add nil-safe label access in namespace reconciler setup
- [x] Add APIReader to namespace and source reconcilers for fresh data
- [x] Improve type assertions with proper error handling in mirror operations
- [x] Reorder struct fields for consistency and readability
- [x] Add comprehensive pattern validation tests and validation API

* feat(controller): add lazy watcher, improve resource usage and add pattern validation

- [x] Add circuit breaker for reconciliation failure tracking and prevention
- [x] Implement granular registration state tracking (not-registered, source-only, fully-registered)
- [x] Add lazy controller initialization for active resource types only
- [x] Consolidate namespace listing into single API call for efficiency
- [x] Add mirror creation verification to catch webhook rejections
- [x] Implement high-cardinality resource detection and warnings
- [x] Add source deletion check in mirror reconciler to prevent races
- [x] Preserve transformation annotations on errors in mirror reconciliation
- [x] Expand constants documentation with labels vs annotations design rationale
- [x] Add comprehensive test coverage for circuit breaker and registration states
- [x] Add mutation-safety tests for hash computation

* fixup! feat(controller): add lazy watcher, improve resource usage and add pattern validation
2026-01-14 13:07:11 +00:00

285 lines
7.3 KiB
Go

// Package circuitbreaker provides circuit breaker functionality for reconciliation failures.
// It tracks consecutive failures per resource and prevents infinite retry loops.
package circuitbreaker
import (
"sync"
"time"
)
// State represents the circuit breaker state
type State int
const (
// StateClosed means the circuit is operating normally
StateClosed State = iota
// StateOpen means the circuit is open (failures exceeded threshold)
StateOpen
// StateHalfOpen means the circuit is testing if the resource can recover
StateHalfOpen
)
func (s State) String() string {
switch s {
case StateClosed:
return "closed"
case StateOpen:
return "open"
case StateHalfOpen:
return "half-open"
default:
return "unknown"
}
}
// Config contains circuit breaker configuration
type Config struct {
// FailureThreshold is the number of consecutive failures before opening the circuit
FailureThreshold int
// ResetTimeout is how long to wait before attempting to close the circuit
ResetTimeout time.Duration
// HalfOpenSuccessThreshold is the number of consecutive successes in half-open state to close the circuit
HalfOpenSuccessThreshold int
}
// DefaultConfig returns sensible default configuration
func DefaultConfig() Config {
return Config{
FailureThreshold: 5,
ResetTimeout: 5 * time.Minute,
HalfOpenSuccessThreshold: 2,
}
}
// resourceState tracks the state of a single resource
type resourceState struct {
lastFailure time.Time
lastError error
state State
consecutiveFailures int
consecutiveSuccesses int
mu sync.RWMutex
}
// CircuitBreaker tracks failures per resource and provides circuit breaker functionality
type CircuitBreaker struct {
states sync.Map
config Config
}
// New creates a new CircuitBreaker with the given configuration
func New(config Config) *CircuitBreaker {
return &CircuitBreaker{
config: config,
}
}
// NewWithDefaults creates a new CircuitBreaker with default configuration
func NewWithDefaults() *CircuitBreaker {
return New(DefaultConfig())
}
// resourceKey generates a unique key for a resource
func resourceKey(namespace, name, kind string) string {
return namespace + "/" + name + "/" + kind
}
// getOrCreateState returns the state for a resource, creating if necessary
func (cb *CircuitBreaker) getOrCreateState(key string) *resourceState {
state, _ := cb.states.LoadOrStore(key, &resourceState{
state: StateClosed,
})
return state.(*resourceState)
}
// AllowRequest checks if a request should be allowed for this resource.
// Returns true if the request should proceed, false if it should be skipped.
// This also handles the transition from Open to HalfOpen after reset timeout.
func (cb *CircuitBreaker) AllowRequest(namespace, name, kind string) bool {
key := resourceKey(namespace, name, kind)
state := cb.getOrCreateState(key)
state.mu.Lock()
defer state.mu.Unlock()
switch state.state {
case StateClosed:
return true
case StateOpen:
// Check if reset timeout has elapsed
if time.Since(state.lastFailure) >= cb.config.ResetTimeout {
// Transition to half-open
state.state = StateHalfOpen
state.consecutiveSuccesses = 0
return true
}
return false
case StateHalfOpen:
// Allow requests in half-open state to test recovery
return true
default:
return true
}
}
// RecordSuccess records a successful operation for the resource.
// Returns the new state after recording.
func (cb *CircuitBreaker) RecordSuccess(namespace, name, kind string) State {
key := resourceKey(namespace, name, kind)
state := cb.getOrCreateState(key)
state.mu.Lock()
defer state.mu.Unlock()
state.consecutiveFailures = 0
state.lastError = nil
switch state.state {
case StateHalfOpen:
state.consecutiveSuccesses++
if state.consecutiveSuccesses >= cb.config.HalfOpenSuccessThreshold {
state.state = StateClosed
state.consecutiveSuccesses = 0
}
case StateOpen:
// If we got a success while open (after timeout), go to half-open
if time.Since(state.lastFailure) >= cb.config.ResetTimeout {
state.state = StateHalfOpen
state.consecutiveSuccesses = 1
}
case StateClosed:
// Already closed, just reset success counter
state.consecutiveSuccesses = 0
}
return state.state
}
// RecordFailure records a failed operation for the resource.
// Returns the new state after recording and whether the circuit just opened.
func (cb *CircuitBreaker) RecordFailure(namespace, name, kind string, err error) (State, bool) {
key := resourceKey(namespace, name, kind)
state := cb.getOrCreateState(key)
state.mu.Lock()
defer state.mu.Unlock()
state.consecutiveFailures++
state.consecutiveSuccesses = 0
state.lastFailure = time.Now()
state.lastError = err
justOpened := false
switch state.state {
case StateClosed:
if state.consecutiveFailures >= cb.config.FailureThreshold {
state.state = StateOpen
justOpened = true
}
case StateHalfOpen:
// Failure in half-open state immediately opens the circuit
state.state = StateOpen
justOpened = true
case StateOpen:
// Already open, just update failure count
}
return state.state, justOpened
}
// GetState returns the current state for a resource
func (cb *CircuitBreaker) GetState(namespace, name, kind string) State {
key := resourceKey(namespace, name, kind)
state := cb.getOrCreateState(key)
state.mu.RLock()
defer state.mu.RUnlock()
// Check if open circuit should transition to half-open
if state.state == StateOpen && time.Since(state.lastFailure) >= cb.config.ResetTimeout {
return StateHalfOpen
}
return state.state
}
// GetFailureCount returns the consecutive failure count for a resource
func (cb *CircuitBreaker) GetFailureCount(namespace, name, kind string) int {
key := resourceKey(namespace, name, kind)
state := cb.getOrCreateState(key)
state.mu.RLock()
defer state.mu.RUnlock()
return state.consecutiveFailures
}
// GetLastError returns the last error recorded for a resource
func (cb *CircuitBreaker) GetLastError(namespace, name, kind string) error {
key := resourceKey(namespace, name, kind)
state := cb.getOrCreateState(key)
state.mu.RLock()
defer state.mu.RUnlock()
return state.lastError
}
// Reset resets the circuit breaker state for a resource
func (cb *CircuitBreaker) Reset(namespace, name, kind string) {
key := resourceKey(namespace, name, kind)
cb.states.Delete(key)
}
// OpenCircuits returns a list of resources with open circuits
func (cb *CircuitBreaker) OpenCircuits() []string {
var open []string
cb.states.Range(func(key, value any) bool {
state := value.(*resourceState)
state.mu.RLock()
isOpen := state.state == StateOpen
state.mu.RUnlock()
if isOpen {
open = append(open, key.(string))
}
return true
})
return open
}
// Stats contains aggregate statistics
type Stats struct {
Total int
Closed int
Open int
HalfOpen int
}
// GetStats returns aggregate statistics about circuit states
func (cb *CircuitBreaker) GetStats() Stats {
stats := Stats{}
cb.states.Range(func(key, value any) bool {
state := value.(*resourceState)
state.mu.RLock()
s := state.state
// Check for timeout transition
if s == StateOpen && time.Since(state.lastFailure) >= cb.config.ResetTimeout {
s = StateHalfOpen
}
state.mu.RUnlock()
stats.Total++
switch s {
case StateClosed:
stats.Closed++
case StateOpen:
stats.Open++
case StateHalfOpen:
stats.HalfOpen++
}
return true
})
return stats
}