mirror of
https://github.com/lukaszraczylo/kubemirror.git
synced 2026-06-26 04:43:12 +00:00
fix: hash drift, transformer leak guard, prod logger, ctx-aware wait
M7: extractUnstructuredContent only hashed 'spec' when present, dropping all other top-level content fields. Resources with both spec and data (or any non-spec content) silently drifted until the next 10m resync. Now hashes every non-Kubernetes-managed top-level field, matching the fields updateUnstructuredMirror copies. M6: when a source has a transform annotation, also hash the source's labels and annotations (filtered of kubemirror.raczylo.com/* keys to avoid the controller's own bookkeeping churning the hash). Templates read these via TransformContext; without this a label change wouldn't re-render the transformed mirror. H3: text/template.Execute is not context-aware, so applyTemplateRule's timeout cancels the select but leaks the executor goroutine. Added a process-wide semaphore (cap 64) so a runaway template can't spawn an unbounded number of stuck goroutines on every reconcile. M4: zap dev mode (DPanic-on-error, console output, stacktraces on warning) was hardcoded on. Defaulted to production; --zap-devel flag remains for opt-in. M5: WaitForInitialDiscovery was anchored on context.Background() with its own WithTimeout, so SIGTERM during startup couldn't abort the wait. Now anchors on signalCtx.
This commit is contained in:
@@ -15,6 +15,18 @@ import (
|
||||
"github.com/lukaszraczylo/kubemirror/pkg/constants"
|
||||
)
|
||||
|
||||
// maxConcurrentTemplateExecutions caps the number of in-flight template
|
||||
// executions across the process. text/template.Execute is not context-aware,
|
||||
// so when applyTemplateRule times out the executor goroutine continues to
|
||||
// run until the template returns on its own. This semaphore bounds the
|
||||
// damage from a pathological template (e.g. {{ range }} that never
|
||||
// terminates): once the cap is hit, applyTemplateRule fails fast instead
|
||||
// of leaking another runaway goroutine. The cap is intentionally generous
|
||||
// — normal workloads should never approach it.
|
||||
const maxConcurrentTemplateExecutions = 64
|
||||
|
||||
var templateExecSemaphore = make(chan struct{}, maxConcurrentTemplateExecutions)
|
||||
|
||||
// Transformer applies transformation rules to Kubernetes resources.
|
||||
type Transformer struct {
|
||||
options TransformOptions
|
||||
@@ -168,6 +180,19 @@ func (t *Transformer) applyTemplateRule(u *unstructured.Unstructured, rule Rule,
|
||||
return fmt.Errorf("failed to parse template: %w", err)
|
||||
}
|
||||
|
||||
// Acquire a slot in the global template-execution semaphore. If saturated,
|
||||
// fail fast rather than spawning yet another goroutine that may leak when
|
||||
// it times out (text/template is not context-aware so timed-out goroutines
|
||||
// continue running until the template returns).
|
||||
select {
|
||||
case templateExecSemaphore <- struct{}{}:
|
||||
defer func() { <-templateExecSemaphore }()
|
||||
default:
|
||||
return fmt.Errorf("template execution rejected: %d concurrent executions in flight, "+
|
||||
"likely indicates one or more runaway templates leaking goroutines",
|
||||
maxConcurrentTemplateExecutions)
|
||||
}
|
||||
|
||||
// Execute template with timeout
|
||||
ctxWithTimeout, cancel := context.WithTimeout(context.Background(), t.options.TemplateTimeout)
|
||||
defer cancel()
|
||||
|
||||
@@ -734,6 +734,44 @@ func TestTransformer_TemplateTimeout(t *testing.T) {
|
||||
t.Skip("Template timeout testing is unreliable in unit tests - covered by integration tests")
|
||||
}
|
||||
|
||||
func TestTransformer_TemplateConcurrencyCap(t *testing.T) {
|
||||
// Regression (H3): text/template.Execute is not context-aware, so a
|
||||
// timed-out template execution leaves its goroutine running until the
|
||||
// template returns on its own. We bound that by a global semaphore;
|
||||
// when saturated, applyTemplateRule must fail fast instead of spawning
|
||||
// another goroutine.
|
||||
//
|
||||
// This test saturates the semaphore directly, then asserts the next
|
||||
// call returns the cap-exceeded error rather than blocking or panicking.
|
||||
for i := 0; i < maxConcurrentTemplateExecutions; i++ {
|
||||
templateExecSemaphore <- struct{}{}
|
||||
}
|
||||
defer func() {
|
||||
// Drain whatever the test left in the semaphore so subsequent tests
|
||||
// see a clean state.
|
||||
for {
|
||||
select {
|
||||
case <-templateExecSemaphore:
|
||||
default:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
tmpl := "hello"
|
||||
tr := NewDefaultTransformer()
|
||||
rule := Rule{Path: "data.greeting", Template: &tmpl}
|
||||
u := &unstructured.Unstructured{Object: map[string]interface{}{
|
||||
"apiVersion": "v1",
|
||||
"kind": "ConfigMap",
|
||||
"data": map[string]interface{}{},
|
||||
}}
|
||||
|
||||
err := tr.applyTemplateRule(u, rule, TransformContext{})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "rejected", "saturated semaphore must reject new template executions")
|
||||
}
|
||||
|
||||
func TestMatchGlob(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
Reference in New Issue
Block a user