Files
jobs-manager-operator/controllers/metrics_test.go
T
lukaszraczylo 2b36071647 Multiple fixes (#29)
* Multiple fixes

- add goreleaser to the build / release process
- add kubectl plugin for job graphs visualization
- add installation scripts
- update dependencies

* Update the release & CRD content.

* Next set of improvements.

  Code Quality

  - Label constants: Added LabelWorkflowName, LabelGroupName, LabelJobName, LabelJobID in controllers/definitions.go
  - Removed commented debug code: Cleaned up dead code from multiple files
  - Removed unused dependencyTree field: Cleaned connPackage struct
  - Fixed snake_case variables: Changed to camelCase (runGroup, groupDep, runJob, jobDep, k8sJob)

  Kubernetes Best Practices

  - Finalizers: Implemented handleDeletion() and deleteChildJobs() for proper cleanup
  - Status enum validation: Added +kubebuilder:validation:Enum=pending;running;succeeded;failed;aborted
  - ImagePullPolicy default: Created getImagePullPolicy() helper that defaults to IfNotPresent
  - Resource limits support: Added Resources *corev1.ResourceRequirements to ManagedJobParameters

  Observability

  - Prometheus metrics: Created controllers/metrics.go with counters (jobs created/succeeded/failed), histogram (reconciliation duration), and gauge (active jobs)
  - Structured logging: Added logger field to connPackage, used context-based logging throughout

  Configuration

  - Leader election ID: Made configurable via --leader-election-id flag
  - Development mode: Made configurable via --dev-mode flag and LOG_LEVEL env var

  Performance

  - Dependency lookup optimization: Changed from O(n*m) to O(1) using lookup maps (jobDepMap, groupDepMap)
  - Reconciliation backoff: Added RequeueAfter: 30*time.Second when workflow is running

  Documentation & Testing

  - Godoc documentation: Added comprehensive comments to API types and controller
  - Unit tests: Added helpers_test.go with tests for all helper functions
  - Integration tests: Added managedjob_controller_test.go with Ginkgo/Gomega tests

* Add the helm chart release.

* Add reasonable test coverage.
2025-12-17 22:33:23 +00:00

153 lines
4.8 KiB
Go

package controllers
import (
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/assert"
)
func TestRecordJobCreated(t *testing.T) {
// Reset counter for testing
JobsCreatedTotal.Reset()
// Record job creation
RecordJobCreated("default", "test-workflow", "group1")
RecordJobCreated("default", "test-workflow", "group1")
RecordJobCreated("production", "other-workflow", "group2")
// Verify counts
assert.Equal(t, float64(2), testutil.ToFloat64(JobsCreatedTotal.WithLabelValues("default", "test-workflow", "group1")))
assert.Equal(t, float64(1), testutil.ToFloat64(JobsCreatedTotal.WithLabelValues("production", "other-workflow", "group2")))
}
func TestRecordJobSucceeded(t *testing.T) {
// Reset counter for testing
JobsSucceededTotal.Reset()
// Record job success
RecordJobSucceeded("default", "workflow1", "group1")
RecordJobSucceeded("default", "workflow1", "group1")
RecordJobSucceeded("default", "workflow1", "group2")
// Verify counts
assert.Equal(t, float64(2), testutil.ToFloat64(JobsSucceededTotal.WithLabelValues("default", "workflow1", "group1")))
assert.Equal(t, float64(1), testutil.ToFloat64(JobsSucceededTotal.WithLabelValues("default", "workflow1", "group2")))
}
func TestRecordJobFailed(t *testing.T) {
// Reset counter for testing
JobsFailedTotal.Reset()
// Record job failure
RecordJobFailed("production", "critical-workflow", "init")
RecordJobFailed("production", "critical-workflow", "init")
RecordJobFailed("production", "critical-workflow", "cleanup")
// Verify counts
assert.Equal(t, float64(2), testutil.ToFloat64(JobsFailedTotal.WithLabelValues("production", "critical-workflow", "init")))
assert.Equal(t, float64(1), testutil.ToFloat64(JobsFailedTotal.WithLabelValues("production", "critical-workflow", "cleanup")))
}
func TestSetActiveJobs(t *testing.T) {
// Reset gauge for testing
ActiveJobs.Reset()
// Set active jobs
SetActiveJobs("default", "workflow1", 5)
SetActiveJobs("production", "workflow2", 3)
SetActiveJobs("default", "workflow1", 2) // Update to lower value
// Verify values
assert.Equal(t, float64(2), testutil.ToFloat64(ActiveJobs.WithLabelValues("default", "workflow1")))
assert.Equal(t, float64(3), testutil.ToFloat64(ActiveJobs.WithLabelValues("production", "workflow2")))
}
func TestMetricsLabels(t *testing.T) {
tests := []struct {
name string
namespace string
workflow string
group string
metric *prometheus.CounterVec
recorder func(ns, wf, grp string)
}{
{
name: "created_metric_labels",
namespace: "ns1",
workflow: "wf1",
group: "grp1",
metric: JobsCreatedTotal,
recorder: RecordJobCreated,
},
{
name: "succeeded_metric_labels",
namespace: "ns2",
workflow: "wf2",
group: "grp2",
metric: JobsSucceededTotal,
recorder: RecordJobSucceeded,
},
{
name: "failed_metric_labels",
namespace: "ns3",
workflow: "wf3",
group: "grp3",
metric: JobsFailedTotal,
recorder: RecordJobFailed,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.metric.Reset()
tt.recorder(tt.namespace, tt.workflow, tt.group)
value := testutil.ToFloat64(tt.metric.WithLabelValues(tt.namespace, tt.workflow, tt.group))
assert.Equal(t, float64(1), value)
})
}
}
func TestMetricsMultipleNamespaces(t *testing.T) {
// Reset all metrics
JobsCreatedTotal.Reset()
JobsSucceededTotal.Reset()
JobsFailedTotal.Reset()
ActiveJobs.Reset()
namespaces := []string{"dev", "staging", "production"}
for _, ns := range namespaces {
RecordJobCreated(ns, "workflow", "group")
RecordJobSucceeded(ns, "workflow", "group")
RecordJobFailed(ns, "workflow", "group")
SetActiveJobs(ns, "workflow", 1)
}
for _, ns := range namespaces {
assert.Equal(t, float64(1), testutil.ToFloat64(JobsCreatedTotal.WithLabelValues(ns, "workflow", "group")))
assert.Equal(t, float64(1), testutil.ToFloat64(JobsSucceededTotal.WithLabelValues(ns, "workflow", "group")))
assert.Equal(t, float64(1), testutil.ToFloat64(JobsFailedTotal.WithLabelValues(ns, "workflow", "group")))
assert.Equal(t, float64(1), testutil.ToFloat64(ActiveJobs.WithLabelValues(ns, "workflow")))
}
}
func TestActiveJobsGaugeDecreases(t *testing.T) {
ActiveJobs.Reset()
// Simulate job lifecycle
SetActiveJobs("default", "workflow", 0)
assert.Equal(t, float64(0), testutil.ToFloat64(ActiveJobs.WithLabelValues("default", "workflow")))
SetActiveJobs("default", "workflow", 5)
assert.Equal(t, float64(5), testutil.ToFloat64(ActiveJobs.WithLabelValues("default", "workflow")))
SetActiveJobs("default", "workflow", 3)
assert.Equal(t, float64(3), testutil.ToFloat64(ActiveJobs.WithLabelValues("default", "workflow")))
SetActiveJobs("default", "workflow", 0)
assert.Equal(t, float64(0), testutil.ToFloat64(ActiveJobs.WithLabelValues("default", "workflow")))
}