Multiple fixes (#29)

* Multiple fixes

- add goreleaser to the build / release process
- add kubectl plugin for job graphs visualization
- add installation scripts
- update dependencies

* Update the release & CRD content.

* Next set of improvements.

  Code Quality

  - Label constants: Added LabelWorkflowName, LabelGroupName, LabelJobName, LabelJobID in controllers/definitions.go
  - Removed commented debug code: Cleaned up dead code from multiple files
  - Removed unused dependencyTree field: Cleaned connPackage struct
  - Fixed snake_case variables: Changed to camelCase (runGroup, groupDep, runJob, jobDep, k8sJob)

  Kubernetes Best Practices

  - Finalizers: Implemented handleDeletion() and deleteChildJobs() for proper cleanup
  - Status enum validation: Added +kubebuilder:validation:Enum=pending;running;succeeded;failed;aborted
  - ImagePullPolicy default: Created getImagePullPolicy() helper that defaults to IfNotPresent
  - Resource limits support: Added Resources *corev1.ResourceRequirements to ManagedJobParameters

  Observability

  - Prometheus metrics: Created controllers/metrics.go with counters (jobs created/succeeded/failed), histogram (reconciliation duration), and gauge (active jobs)
  - Structured logging: Added logger field to connPackage, used context-based logging throughout

  Configuration

  - Leader election ID: Made configurable via --leader-election-id flag
  - Development mode: Made configurable via --dev-mode flag and LOG_LEVEL env var

  Performance

  - Dependency lookup optimization: Changed from O(n*m) to O(1) using lookup maps (jobDepMap, groupDepMap)
  - Reconciliation backoff: Added RequeueAfter: 30*time.Second when workflow is running

  Documentation & Testing

  - Godoc documentation: Added comprehensive comments to API types and controller
  - Unit tests: Added helpers_test.go with tests for all helper functions
  - Integration tests: Added managedjob_controller_test.go with Ginkgo/Gomega tests

* Add the helm chart release.

* Add reasonable test coverage.
This commit is contained in:
2025-12-17 21:18:04 +00:00
parent b6ce5b7c98
commit 2b36071647
43 changed files with 16182 additions and 8179 deletions
+53 -3
View File
@@ -21,106 +21,156 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// ManagedJobDependencies defines a dependency relationship between jobs or groups
type ManagedJobDependencies struct {
// Name is the identifier of the dependency (job or group name)
// +kubebuilder:validation:Optional
// +kubebuilder:default=""
Name string `json:"name"`
Name string `json:"name"`
// Status tracks the execution status of the dependency
// +kubebuilder:validation:Enum=pending;running;succeeded;failed;aborted
Status string `json:"status"`
}
// ManagedJobDefinition defines a single job within a group
type ManagedJobDefinition struct {
// Name is the unique identifier for this job within the group
// +kubebuilder:validation:Required
// +kubebuilder:validation:MaxLength=40
// +kubebuilder:validation:Pattern=[a-z0-9-]+
Name string `json:"name"`
// Parallel indicates if this job can run in parallel with others in the group
// +kubebuilder:validation:Optional
// +kubebuilder:default=false
Parallel bool `json:"parallel"`
// Image is the container image to run for this job
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=5
Image string `json:"image"`
// Args are the command-line arguments to pass to the container
// +kubebuilder:validation:Optional
Args []string `json:"args,omitempty"`
// Params contains job-specific parameters that override group and spec-level params
// +kubebuilder:validation:Optional
Params ManagedJobParameters `json:"params"`
// Status tracks the execution status of this job
// +kubebuilder:validation:Optional
// +kubebuilder:default=pending
// +kubebuilder:validation:Enum=pending;running;succeeded;failed;aborted
Status string `json:"status"`
// Dependencies lists the jobs that must complete before this job can run
// +kubebuilder:validation:Optional
// +optional
Dependencies []*ManagedJobDependencies `json:"dependencies"`
// CompiledParams contains the merged parameters from spec, group, and job levels
// +optional
CompiledParams ManagedJobParameters `json:"compiledParams"`
}
// ManagedJobGroup defines a group of jobs that can be executed together
type ManagedJobGroup struct {
// Name is the unique identifier for this group within the workflow
// +kubebuilder:validation:Required
// +kubebuilder:validation:MaxLength=40
// +kubebuilder:validation:Pattern=[a-z0-9-]+
Name string `json:"name"`
// Parallel indicates if this group can run in parallel with other groups
// +kubebuilder:validation:Optional
// +kubebuilder:default=false
Parallel bool `json:"parallel"`
// Jobs is the list of jobs to execute within this group
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinItems=1
Jobs []*ManagedJobDefinition `json:"jobs"`
// Params contains group-level parameters that override spec-level params
// +kubebuilder:validation:Optional
Params ManagedJobParameters `json:"params"`
// Dependencies lists the groups that must complete before this group can run
// +kubebuilder:validation:Optional
// +optional
Dependencies []*ManagedJobDependencies `json:"dependencies"`
// Status tracks the execution status of this group
// +kubebuilder:validation:Optional
// +kubebuilder:default=pending
// +kubebuilder:validation:Enum=pending;running;succeeded;failed;aborted
Status string `json:"status"`
}
// ManagedJobParameters defines common parameters that can be set at spec, group, or job level.
// Parameters at lower levels override those at higher levels.
type ManagedJobParameters struct {
// FromEnv specifies environment variable sources (ConfigMaps, Secrets)
// +kubebuilder:validation:Optional
FromEnv []corev1.EnvFromSource `json:"fromEnv,omitempty"`
// Env specifies individual environment variables
// +kubebuilder:validation:Optional
Env []corev1.EnvVar `json:"env,omitempty"`
// Volumes specifies volumes to mount in job pods
// +kubebuilder:validation:Optional
Volumes []corev1.Volume `json:"volumes,omitempty"`
// VolumeMounts specifies where to mount volumes in containers
// +kubebuilder:validation:Optional
VolumeMounts []corev1.VolumeMount `json:"volumeMount,omitempty"`
// ServiceAccount is the Kubernetes service account to use for job pods
// +kubebuilder:validation:Optional
ServiceAccount string `json:"serviceAccount,omitempty"`
// RestartPolicy defines the pod restart policy (Never, OnFailure)
// +kubebuilder:validation:Optional
// +kubebuilder:default=OnFailure
// +kubebuilder:validation:Enum=Never;OnFailure
RestartPolicy string `json:"restartPolicy,omitempty"`
// ImagePullSecrets are references to secrets for pulling private images
// +kubebuilder:validation:Optional
ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"`
// ImagePullPolicy defines when to pull the container image
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
ImagePullPolicy string `json:"imagePullPolicy,omitempty"`
// Labels are additional labels to apply to job pods
// +kubebuilder:validation:Optional
Labels map[string]string `json:"labels,omitempty"`
// Annotations are additional annotations to apply to job pods
// +kubebuilder:validation:Optional
Annotations map[string]string `json:"annotations,omitempty"`
// Resources specifies compute resources for the job container
// +kubebuilder:validation:Optional
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
}
// ManagedJobSpec defines the desired state of ManagedJob
type ManagedJobSpec struct {
// Retries is the number of times to retry failed jobs
// +kubebuilder:validation:Required
// +kubebuilder:default=1
// +kubebuilder:validation:Minimum=1
// +kubebuilder:validation:Minimum=0
// +kubebuilder:validation:Maximum=100
Retries int `json:"retries"`
// Groups is the list of job groups to execute in this workflow
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinItems=1
Groups []*ManagedJobGroup `json:"groups"`
// Params contains spec-level parameters that apply to all jobs
// +kubebuilder:validation:Optional
Params ManagedJobParameters `json:"params"`
}
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// ManagedJob is the Schema for the managedjobs API
// +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status`
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
// ManagedJob is the Schema for the managedjobs API.
// It defines a workflow consisting of groups of jobs with dependencies.
type ManagedJob struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
// Spec defines the desired workflow configuration
Spec ManagedJobSpec `json:"spec,omitempty"`
// Status tracks the overall execution status of the workflow
// +kubebuilder:validation:Optional
// +kubebuilder:default=pending
// +kubebuilder:validation:Enum=pending;running;succeeded;failed;aborted
Status string `json:"status"`
}
+5 -1
View File
@@ -1,5 +1,4 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2023.
@@ -220,6 +219,11 @@ func (in *ManagedJobParameters) DeepCopyInto(out *ManagedJobParameters) {
(*out)[key] = val
}
}
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = new(v1.ResourceRequirements)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManagedJobParameters.