From dfe08b35d1a8300c4e5ed4f6d9c3800f24e22ad5 Mon Sep 17 00:00:00 2001 From: Lukasz Raczylo Date: Sat, 2 May 2026 22:39:09 +0100 Subject: [PATCH] fix(controller): stop self-triggered reconcile loops C2: updateLastSyncStatus wrote the sync-status annotation on every successful reconcile. Because the source's watch predicate is the 'enabled' label (server-side filter), that Update fires a watch event that re-enters Reconcile. With reconciled/error counts varying across cycles, the value differs each time, so the API server bumps RV and the loop never quiesces. Now skips the Update when the value matches the existing annotation. C3: NamespaceReconciler's happy-path returned RequeueAfter=3s unconditionally. Every namespace in the cluster re-reconciled every 3 seconds forever, generating constant List calls per source kind. Now returns ctrl.Result{}; cache-staleness windows are handled by the manager's resync period and source freshness verification. --- pkg/controller/namespace_reconciler.go | 22 ++++----- pkg/controller/namespace_reconciler_test.go | 7 ++- pkg/controller/source_reconciler.go | 16 +++++-- pkg/controller/source_reconciler_test.go | 52 +++++++++++++++++++++ 4 files changed, 79 insertions(+), 18 deletions(-) diff --git a/pkg/controller/namespace_reconciler.go b/pkg/controller/namespace_reconciler.go index bf5adc2..ce4acee 100644 --- a/pkg/controller/namespace_reconciler.go +++ b/pkg/controller/namespace_reconciler.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "slices" - "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -22,13 +21,6 @@ import ( "github.com/lukaszraczylo/kubemirror/pkg/filter" ) -const ( - // cacheSettleDelay is the time to wait after namespace label changes - // to allow informer caches to sync. This addresses the race condition - // where namespace watch events fire before the cache is updated. - cacheSettleDelay = 3 * time.Second -) - // NamespaceReconciler watches for namespace CREATE and UPDATE events // and triggers reconciliation of source resources that match the new namespace. type NamespaceReconciler struct { @@ -88,11 +80,15 @@ func (r *NamespaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, fmt.Errorf("failed to reconcile %d source resources", totalErrors) } - // Requeue with delay to catch any updates missed due to cache staleness. - // This is particularly important for namespace label changes where the - // informer cache may not yet reflect the new label state. The delay allows - // the cache to settle and ensures all relevant source resources are reconciled. - return ctrl.Result{RequeueAfter: cacheSettleDelay}, nil + // Don't requeue. The previous unconditional RequeueAfter caused every + // namespace in the cluster to re-reconcile every 3 seconds forever, + // generating constant API-server pressure scaled by namespace count. + // Cache-staleness windows after label changes are handled by: + // - the manager's resync period (default 10m), which re-fires events, + // - source freshness verification (--verify-source-freshness, default on) + // in the SourceReconciler path, + // - and the next genuine namespace event. + return ctrl.Result{}, nil } // reconcileResourceType finds and reconciles all sources of a specific resource type diff --git a/pkg/controller/namespace_reconciler_test.go b/pkg/controller/namespace_reconciler_test.go index db7c7d6..667ba06 100644 --- a/pkg/controller/namespace_reconciler_test.go +++ b/pkg/controller/namespace_reconciler_test.go @@ -203,8 +203,13 @@ func TestNamespaceReconciler_CleanupWhenNamespaceNoLongerTarget(t *testing.T) { Name: tt.namespace.Name, }, } - _, err := reconciler.Reconcile(ctx, req) + result, err := reconciler.Reconcile(ctx, req) require.NoError(t, err) + // Regression: never schedule an unconditional re-reconcile. The + // previous implementation returned RequeueAfter=3s for every + // namespace event, which scaled to one re-reconcile per namespace + // every 3 seconds forever. + assert.Zero(t, result.RequeueAfter, "happy-path Reconcile must not schedule a periodic requeue") // Verify mirrors were deleted as expected for _, mirrorName := range tt.expectedDeleted { diff --git a/pkg/controller/source_reconciler.go b/pkg/controller/source_reconciler.go index ead83c1..3fce3c3 100644 --- a/pkg/controller/source_reconciler.go +++ b/pkg/controller/source_reconciler.go @@ -627,16 +627,24 @@ func (r *SourceReconciler) resolveTargetNamespaces(ctx context.Context, sourceOb return targetNamespaces, nil } -// updateLastSyncStatus updates the source resource's annotations with sync status. +// updateLastSyncStatus updates the source resource's sync-status annotation. +// Skips the Update call entirely if the value is unchanged; otherwise the +// resulting watch event re-fires Reconcile and the controller spins on its +// own writes. This is a no-op for steady-state convergence. func (r *SourceReconciler) updateLastSyncStatus(ctx context.Context, source runtime.Object, sourceObj metav1.Object, reconciledCount, errorCount int) error { + desired := fmt.Sprintf("reconciled:%d,errors:%d", reconciledCount, errorCount) + annotations := sourceObj.GetAnnotations() + if annotations[constants.AnnotationSyncStatus] == desired { + return nil + } + if annotations == nil { annotations = make(map[string]string) } - - annotations[constants.AnnotationSyncStatus] = fmt.Sprintf("reconciled:%d,errors:%d", reconciledCount, errorCount) - + annotations[constants.AnnotationSyncStatus] = desired sourceObj.SetAnnotations(annotations) + // source (*unstructured.Unstructured) already implements client.Object return r.Update(ctx, source.(*unstructured.Unstructured)) } diff --git a/pkg/controller/source_reconciler_test.go b/pkg/controller/source_reconciler_test.go index 230ec2d..71783a7 100644 --- a/pkg/controller/source_reconciler_test.go +++ b/pkg/controller/source_reconciler_test.go @@ -1103,3 +1103,55 @@ func TestSourceReconciler_Reconcile_RefusesBlacklistedSecret(t *testing.T) { mockLister.AssertNotCalled(t, "ListNamespacesWithLabels", mock.Anything) mockClient.AssertNotCalled(t, "Create", mock.Anything, mock.Anything, mock.Anything) } +func TestSourceReconciler_updateLastSyncStatus_skipsWhenUnchanged(t *testing.T) { + // Regression test: re-running with the same reconciled/error counts must + // NOT issue an Update call — otherwise every successful reconcile bumps + // resourceVersion, fires a watch event, and re-enters Reconcile in a loop. + mockClient := new(MockClient) + + source := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]interface{}{ + "name": "test", + "namespace": "default", + "annotations": map[string]interface{}{ + constants.AnnotationSyncStatus: "reconciled:3,errors:0", + }, + }, + }, + } + + r := &SourceReconciler{Client: mockClient} + err := r.updateLastSyncStatus(context.Background(), source, source, 3, 0) + require.NoError(t, err) + + mockClient.AssertNotCalled(t, "Update", mock.Anything, mock.Anything, mock.Anything) +} + +func TestSourceReconciler_updateLastSyncStatus_writesWhenChanged(t *testing.T) { + mockClient := new(MockClient) + mockClient.On("Update", mock.Anything, mock.AnythingOfType("*unstructured.Unstructured"), mock.Anything).Return(nil) + + source := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]interface{}{ + "name": "test", + "namespace": "default", + "annotations": map[string]interface{}{ + constants.AnnotationSyncStatus: "reconciled:2,errors:0", + }, + }, + }, + } + + r := &SourceReconciler{Client: mockClient} + err := r.updateLastSyncStatus(context.Background(), source, source, 3, 0) + require.NoError(t, err) + + assert.Equal(t, "reconciled:3,errors:0", source.GetAnnotations()[constants.AnnotationSyncStatus]) + mockClient.AssertCalled(t, "Update", mock.Anything, mock.Anything, mock.Anything) +}