improvements jan2025 (#6)

* feat(controller): add lazy watcher, improve resource usage and add pattern validation

- [x] Add cache sync health check for readiness probe verification
- [x] Create namespace lister with API reader support for fresh label queries
- [x] Add pattern validation with warning logs for invalid glob patterns
- [x] Implement lazy watcher initialization mode to scan for active resources
- [x] Add requeue delay to namespace reconciler for cache settlement
- [x] Replace custom containsString with slices.Contains from stdlib
- [x] Add structured logging context to reconcilers (kind, group, version)
- [x] Improve error variable naming for clarity in nested conditions
- [x] Add nil-safe label access in namespace reconciler setup
- [x] Add APIReader to namespace and source reconcilers for fresh data
- [x] Improve type assertions with proper error handling in mirror operations
- [x] Reorder struct fields for consistency and readability
- [x] Add comprehensive pattern validation tests and validation API

* feat(controller): add lazy watcher, improve resource usage and add pattern validation

- [x] Add circuit breaker for reconciliation failure tracking and prevention
- [x] Implement granular registration state tracking (not-registered, source-only, fully-registered)
- [x] Add lazy controller initialization for active resource types only
- [x] Consolidate namespace listing into single API call for efficiency
- [x] Add mirror creation verification to catch webhook rejections
- [x] Implement high-cardinality resource detection and warnings
- [x] Add source deletion check in mirror reconciler to prevent races
- [x] Preserve transformation annotations on errors in mirror reconciliation
- [x] Expand constants documentation with labels vs annotations design rationale
- [x] Add comprehensive test coverage for circuit breaker and registration states
- [x] Add mutation-safety tests for hash computation

* fixup! feat(controller): add lazy watcher, improve resource usage and add pattern validation
This commit is contained in:
2026-01-14 13:07:11 +00:00
committed by GitHub
parent 4f8e2783cf
commit 096dca47d1
22 changed files with 1937 additions and 266 deletions
+81 -7
View File
@@ -3,10 +3,13 @@ package main
import (
"context"
"errors"
"flag"
"net/http"
"os"
"time"
"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -18,6 +21,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"github.com/lukaszraczylo/kubemirror/pkg/circuitbreaker"
"github.com/lukaszraczylo/kubemirror/pkg/config"
"github.com/lukaszraczylo/kubemirror/pkg/constants"
"github.com/lukaszraczylo/kubemirror/pkg/controller"
@@ -34,6 +38,24 @@ func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
}
// makeCacheSyncChecker creates a healthz.Checker that verifies informer cache sync.
// This ensures the readiness probe fails if caches are not synced.
func makeCacheSyncChecker(c cache.Cache, ctx context.Context, logger logr.Logger) healthz.Checker {
return func(_ *http.Request) error {
// WaitForCacheSync returns true immediately if already synced,
// or waits until sync completes or context is cancelled.
// With a short context timeout, this provides a quick check.
checkCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
defer cancel()
if !c.WaitForCacheSync(checkCtx) {
logger.V(1).Info("informer caches not yet synced")
return errors.New("informer caches not synced")
}
return nil
}
}
func main() {
var (
metricsAddr string
@@ -143,6 +165,14 @@ func main() {
"included", includedList,
)
// Create circuit breaker for reconciliation failures
cb := circuitbreaker.NewWithDefaults()
setupLog.Info("circuit breaker initialized",
"failureThreshold", 5,
"resetTimeout", "5m",
"halfOpenSuccessThreshold", 2,
)
// Parse and configure resource types
var mirroredResources []config.ResourceType
if resourceTypes != "" {
@@ -212,13 +242,29 @@ func main() {
os.Exit(1)
}
// Note on Field Indexes:
// Field indexes in controller-runtime can improve performance for in-cache lookups.
// For kubemirror, potential indexes include:
// 1. metadata.labels[kubemirror.raczylo.com/enabled] - for finding enabled resources
// 2. annotations[kubemirror.raczylo.com/source-uid] - for finding mirrors by source
//
// However, these are not implemented because:
// - Server-side filtering via label selectors already handles enabled label filtering efficiently
// - Mirror-to-source lookups are currently done by listing all managed resources
// - Dynamic resource types (unstructured) make index setup more complex
// - Benchmark testing is required to verify indexes improve performance before adding complexity
//
// If benchmarks show indexes would help, use:
// mgr.GetFieldIndexer().IndexField(ctx, &unstructured.Unstructured{...}, indexPath, extractFunc)
// Set up signal handler context for graceful shutdown
signalCtx := ctrl.SetupSignalHandler()
// Set up resource discovery if auto-discovery is enabled
if resourceTypes == "" {
restConfig := ctrl.GetConfigOrDie()
discoveryClient, err := discovery.NewResourceDiscovery(restConfig)
var discoveryClient *discovery.ResourceDiscovery
discoveryClient, err = discovery.NewResourceDiscovery(restConfig)
if err != nil {
setupLog.Error(err, "unable to create discovery client")
os.Exit(1)
@@ -227,7 +273,8 @@ func main() {
discoveryMgr := discovery.NewManager(discoveryClient, discoveryInterval)
// Start discovery manager with signal-aware context
if err := discoveryMgr.Start(signalCtx); err != nil {
err = discoveryMgr.Start(signalCtx)
if err != nil {
setupLog.Error(err, "unable to start discovery manager")
os.Exit(1)
}
@@ -235,7 +282,8 @@ func main() {
// Wait for initial discovery with 30s timeout
waitCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := discoveryMgr.WaitForInitialDiscovery(waitCtx, 30*time.Second); err != nil {
err = discoveryMgr.WaitForInitialDiscovery(waitCtx, 30*time.Second)
if err != nil {
setupLog.Error(err, "timeout waiting for initial resource discovery")
os.Exit(1)
}
@@ -250,8 +298,21 @@ func main() {
)
}
// Create namespace lister
namespaceLister := controller.NewKubernetesNamespaceLister(mgr.GetClient())
// Create namespace lister with API reader for fresh namespace lookups.
// This ensures label-based queries (allow-mirrors label) return fresh data
// and don't suffer from informer cache staleness after label changes.
namespaceLister := controller.NewKubernetesNamespaceListerWithAPIReader(
mgr.GetClient(),
mgr.GetAPIReader(),
)
// Validate flag combinations and warn about conflicts
if lazyWatcherInit && resourceTypes != "" {
setupLog.Info("WARNING: --resource-types flag is ignored in lazy-watcher-init mode",
"specifiedTypes", resourceTypes,
"reason", "lazy watcher discovers resource types dynamically based on actual usage",
)
}
// Choose between lazy watcher initialization (scan for active resources) or eager (register all)
if lazyWatcherInit {
@@ -270,6 +331,7 @@ func main() {
NamespaceLister: namespaceLister,
GVK: gvk,
APIReader: mgr.GetAPIReader(),
CircuitBreaker: cb,
}
}
@@ -284,6 +346,7 @@ func main() {
// Create dynamic controller manager
dynamicMgr := controller.NewDynamicControllerManager(controller.DynamicManagerConfig{
Client: mgr.GetClient(),
APIReader: mgr.GetAPIReader(), // Direct API reader for pre-start scans
Manager: mgr,
Config: cfg,
Filter: namespaceFilter,
@@ -295,7 +358,8 @@ func main() {
})
// Start dynamic controller manager
if err := dynamicMgr.Start(signalCtx); err != nil {
err = dynamicMgr.Start(signalCtx)
if err != nil {
setupLog.Error(err, "unable to start dynamic controller manager")
os.Exit(1)
}
@@ -325,6 +389,7 @@ func main() {
NamespaceLister: namespaceLister,
GVK: gvk,
APIReader: mgr.GetAPIReader(), // Direct API reader (bypasses cache)
CircuitBreaker: cb,
}
if err = sourceReconciler.SetupWithManagerForResourceType(mgr, gvk); err != nil {
@@ -361,6 +426,7 @@ func main() {
Filter: namespaceFilter,
NamespaceLister: namespaceLister,
ResourceTypes: cfg.MirroredResourceTypes,
APIReader: mgr.GetAPIReader(), // Direct API reader for fresh namespace lookups
}
if err = namespaceReconciler.SetupWithManager(mgr); err != nil {
@@ -371,11 +437,19 @@ func main() {
setupLog.Info("registered namespace reconciler")
// Add health checks
// Liveness: basic ping to verify the controller process is alive
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
// Readiness: check that informer caches are synced before accepting traffic.
// This prevents reconciliation from running with incomplete/stale cache data.
// The cache sync check ensures all informers have received initial data from the API server.
// Note: The manager automatically waits for cache sync before starting controllers,
// but this check ensures the readiness probe reflects cache state for external monitoring.
cacheReadyCheck := makeCacheSyncChecker(mgr.GetCache(), signalCtx, setupLog)
if err := mgr.AddReadyzCheck("readyz", cacheReadyCheck); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}