fix: hash drift, transformer leak guard, prod logger, ctx-aware wait

M7: extractUnstructuredContent only hashed 'spec' when present, dropping
all other top-level content fields. Resources with both spec and data
(or any non-spec content) silently drifted until the next 10m resync.
Now hashes every non-Kubernetes-managed top-level field, matching the
fields updateUnstructuredMirror copies.

M6: when a source has a transform annotation, also hash the source's
labels and annotations (filtered of kubemirror.raczylo.com/* keys to
avoid the controller's own bookkeeping churning the hash). Templates
read these via TransformContext; without this a label change wouldn't
re-render the transformed mirror.

H3: text/template.Execute is not context-aware, so applyTemplateRule's
timeout cancels the select but leaks the executor goroutine. Added a
process-wide semaphore (cap 64) so a runaway template can't spawn an
unbounded number of stuck goroutines on every reconcile.

M4: zap dev mode (DPanic-on-error, console output, stacktraces on
warning) was hardcoded on. Defaulted to production; --zap-devel flag
remains for opt-in.

M5: WaitForInitialDiscovery was anchored on context.Background() with
its own WithTimeout, so SIGTERM during startup couldn't abort the wait.
Now anchors on signalCtx.
This commit is contained in:
2026-05-02 22:49:15 +01:00
parent cf095e93f4
commit 75f7c18f3c
5 changed files with 188 additions and 30 deletions
+25
View File
@@ -15,6 +15,18 @@ import (
"github.com/lukaszraczylo/kubemirror/pkg/constants"
)
// maxConcurrentTemplateExecutions caps the number of in-flight template
// executions across the process. text/template.Execute is not context-aware,
// so when applyTemplateRule times out the executor goroutine continues to
// run until the template returns on its own. This semaphore bounds the
// damage from a pathological template (e.g. {{ range }} that never
// terminates): once the cap is hit, applyTemplateRule fails fast instead
// of leaking another runaway goroutine. The cap is intentionally generous
// — normal workloads should never approach it.
const maxConcurrentTemplateExecutions = 64
var templateExecSemaphore = make(chan struct{}, maxConcurrentTemplateExecutions)
// Transformer applies transformation rules to Kubernetes resources.
type Transformer struct {
options TransformOptions
@@ -168,6 +180,19 @@ func (t *Transformer) applyTemplateRule(u *unstructured.Unstructured, rule Rule,
return fmt.Errorf("failed to parse template: %w", err)
}
// Acquire a slot in the global template-execution semaphore. If saturated,
// fail fast rather than spawning yet another goroutine that may leak when
// it times out (text/template is not context-aware so timed-out goroutines
// continue running until the template returns).
select {
case templateExecSemaphore <- struct{}{}:
defer func() { <-templateExecSemaphore }()
default:
return fmt.Errorf("template execution rejected: %d concurrent executions in flight, "+
"likely indicates one or more runaway templates leaking goroutines",
maxConcurrentTemplateExecutions)
}
// Execute template with timeout
ctxWithTimeout, cancel := context.WithTimeout(context.Background(), t.options.TemplateTimeout)
defer cancel()
+38
View File
@@ -734,6 +734,44 @@ func TestTransformer_TemplateTimeout(t *testing.T) {
t.Skip("Template timeout testing is unreliable in unit tests - covered by integration tests")
}
func TestTransformer_TemplateConcurrencyCap(t *testing.T) {
// Regression (H3): text/template.Execute is not context-aware, so a
// timed-out template execution leaves its goroutine running until the
// template returns on its own. We bound that by a global semaphore;
// when saturated, applyTemplateRule must fail fast instead of spawning
// another goroutine.
//
// This test saturates the semaphore directly, then asserts the next
// call returns the cap-exceeded error rather than blocking or panicking.
for i := 0; i < maxConcurrentTemplateExecutions; i++ {
templateExecSemaphore <- struct{}{}
}
defer func() {
// Drain whatever the test left in the semaphore so subsequent tests
// see a clean state.
for {
select {
case <-templateExecSemaphore:
default:
return
}
}
}()
tmpl := "hello"
tr := NewDefaultTransformer()
rule := Rule{Path: "data.greeting", Template: &tmpl}
u := &unstructured.Unstructured{Object: map[string]interface{}{
"apiVersion": "v1",
"kind": "ConfigMap",
"data": map[string]interface{}{},
}}
err := tr.applyTemplateRule(u, rule, TransformContext{})
require.Error(t, err)
assert.Contains(t, err.Error(), "rejected", "saturated semaphore must reject new template executions")
}
func TestMatchGlob(t *testing.T) {
tests := []struct {
name string