fix(controller): stop self-triggered reconcile loops

C2: updateLastSyncStatus wrote the sync-status annotation on every
successful reconcile. Because the source's watch predicate is the
'enabled' label (server-side filter), that Update fires a watch event
that re-enters Reconcile. With reconciled/error counts varying across
cycles, the value differs each time, so the API server bumps RV and
the loop never quiesces. Now skips the Update when the value matches
the existing annotation.

C3: NamespaceReconciler's happy-path returned RequeueAfter=3s
unconditionally. Every namespace in the cluster re-reconciled every
3 seconds forever, generating constant List calls per source kind.
Now returns ctrl.Result{}; cache-staleness windows are handled by
the manager's resync period and source freshness verification.
This commit is contained in:
2026-05-02 22:39:09 +01:00
parent 99c0eccd53
commit dfe08b35d1
4 changed files with 79 additions and 18 deletions
+9 -13
View File
@@ -5,7 +5,6 @@ import (
"context"
"fmt"
"slices"
"time"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
@@ -22,13 +21,6 @@ import (
"github.com/lukaszraczylo/kubemirror/pkg/filter"
)
const (
// cacheSettleDelay is the time to wait after namespace label changes
// to allow informer caches to sync. This addresses the race condition
// where namespace watch events fire before the cache is updated.
cacheSettleDelay = 3 * time.Second
)
// NamespaceReconciler watches for namespace CREATE and UPDATE events
// and triggers reconciliation of source resources that match the new namespace.
type NamespaceReconciler struct {
@@ -88,11 +80,15 @@ func (r *NamespaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, fmt.Errorf("failed to reconcile %d source resources", totalErrors)
}
// Requeue with delay to catch any updates missed due to cache staleness.
// This is particularly important for namespace label changes where the
// informer cache may not yet reflect the new label state. The delay allows
// the cache to settle and ensures all relevant source resources are reconciled.
return ctrl.Result{RequeueAfter: cacheSettleDelay}, nil
// Don't requeue. The previous unconditional RequeueAfter caused every
// namespace in the cluster to re-reconcile every 3 seconds forever,
// generating constant API-server pressure scaled by namespace count.
// Cache-staleness windows after label changes are handled by:
// - the manager's resync period (default 10m), which re-fires events,
// - source freshness verification (--verify-source-freshness, default on)
// in the SourceReconciler path,
// - and the next genuine namespace event.
return ctrl.Result{}, nil
}
// reconcileResourceType finds and reconciles all sources of a specific resource type
+6 -1
View File
@@ -203,8 +203,13 @@ func TestNamespaceReconciler_CleanupWhenNamespaceNoLongerTarget(t *testing.T) {
Name: tt.namespace.Name,
},
}
_, err := reconciler.Reconcile(ctx, req)
result, err := reconciler.Reconcile(ctx, req)
require.NoError(t, err)
// Regression: never schedule an unconditional re-reconcile. The
// previous implementation returned RequeueAfter=3s for every
// namespace event, which scaled to one re-reconcile per namespace
// every 3 seconds forever.
assert.Zero(t, result.RequeueAfter, "happy-path Reconcile must not schedule a periodic requeue")
// Verify mirrors were deleted as expected
for _, mirrorName := range tt.expectedDeleted {
+12 -4
View File
@@ -627,16 +627,24 @@ func (r *SourceReconciler) resolveTargetNamespaces(ctx context.Context, sourceOb
return targetNamespaces, nil
}
// updateLastSyncStatus updates the source resource's annotations with sync status.
// updateLastSyncStatus updates the source resource's sync-status annotation.
// Skips the Update call entirely if the value is unchanged; otherwise the
// resulting watch event re-fires Reconcile and the controller spins on its
// own writes. This is a no-op for steady-state convergence.
func (r *SourceReconciler) updateLastSyncStatus(ctx context.Context, source runtime.Object, sourceObj metav1.Object, reconciledCount, errorCount int) error {
desired := fmt.Sprintf("reconciled:%d,errors:%d", reconciledCount, errorCount)
annotations := sourceObj.GetAnnotations()
if annotations[constants.AnnotationSyncStatus] == desired {
return nil
}
if annotations == nil {
annotations = make(map[string]string)
}
annotations[constants.AnnotationSyncStatus] = fmt.Sprintf("reconciled:%d,errors:%d", reconciledCount, errorCount)
annotations[constants.AnnotationSyncStatus] = desired
sourceObj.SetAnnotations(annotations)
// source (*unstructured.Unstructured) already implements client.Object
return r.Update(ctx, source.(*unstructured.Unstructured))
}
+52
View File
@@ -1103,3 +1103,55 @@ func TestSourceReconciler_Reconcile_RefusesBlacklistedSecret(t *testing.T) {
mockLister.AssertNotCalled(t, "ListNamespacesWithLabels", mock.Anything)
mockClient.AssertNotCalled(t, "Create", mock.Anything, mock.Anything, mock.Anything)
}
func TestSourceReconciler_updateLastSyncStatus_skipsWhenUnchanged(t *testing.T) {
// Regression test: re-running with the same reconciled/error counts must
// NOT issue an Update call — otherwise every successful reconcile bumps
// resourceVersion, fires a watch event, and re-enters Reconcile in a loop.
mockClient := new(MockClient)
source := &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "v1",
"kind": "Secret",
"metadata": map[string]interface{}{
"name": "test",
"namespace": "default",
"annotations": map[string]interface{}{
constants.AnnotationSyncStatus: "reconciled:3,errors:0",
},
},
},
}
r := &SourceReconciler{Client: mockClient}
err := r.updateLastSyncStatus(context.Background(), source, source, 3, 0)
require.NoError(t, err)
mockClient.AssertNotCalled(t, "Update", mock.Anything, mock.Anything, mock.Anything)
}
func TestSourceReconciler_updateLastSyncStatus_writesWhenChanged(t *testing.T) {
mockClient := new(MockClient)
mockClient.On("Update", mock.Anything, mock.AnythingOfType("*unstructured.Unstructured"), mock.Anything).Return(nil)
source := &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "v1",
"kind": "Secret",
"metadata": map[string]interface{}{
"name": "test",
"namespace": "default",
"annotations": map[string]interface{}{
constants.AnnotationSyncStatus: "reconciled:2,errors:0",
},
},
},
}
r := &SourceReconciler{Client: mockClient}
err := r.updateLastSyncStatus(context.Background(), source, source, 3, 0)
require.NoError(t, err)
assert.Equal(t, "reconciled:3,errors:0", source.GetAnnotations()[constants.AnnotationSyncStatus])
mockClient.AssertCalled(t, "Update", mock.Anything, mock.Anything, mock.Anything)
}