Ensure that job pods are also removed.

This commit is contained in:
2024-09-11 23:31:06 +01:00
parent cfe481259b
commit 9cf5975def
5 changed files with 41 additions and 26 deletions
-2
View File
@@ -15,7 +15,6 @@ issues:
- path: "internal/*" - path: "internal/*"
linters: linters:
- dupl - dupl
- lll
linters: linters:
disable-all: true disable-all: true
enable: enable:
@@ -30,7 +29,6 @@ linters:
- gosimple - gosimple
- govet - govet
- ineffassign - ineffassign
- lll
- misspell - misspell
- nakedret - nakedret
- prealloc - prealloc
+2 -2
View File
@@ -10,9 +10,9 @@ description: |
type: application type: application
version: 0.1.28 version: 0.1.30
appVersion: "0.1.28" appVersion: "0.1.30"
home: https://github.com/lukaszraczylo/kubernetes-images-sync-operator home: https://github.com/lukaszraczylo/kubernetes-images-sync-operator
@@ -53,6 +53,10 @@ spec:
spec: spec:
description: ClusterImageExportSpec defines the desired state of ClusterImageExport description: ClusterImageExportSpec defines the desired state of ClusterImageExport
properties: properties:
additionalImages:
items:
type: string
type: array
basePath: basePath:
description: Base path for the export - both file and S3 description: Base path for the export - both file and S3
maxLength: 255 maxLength: 255
+1 -1
View File
@@ -12,7 +12,7 @@ sa:
- ALL - ALL
image: image:
repository: ghcr.io/lukaszraczylo/kubernetes-images-sync-operator repository: ghcr.io/lukaszraczylo/kubernetes-images-sync-operator
tag: 0.1.28 tag: 0.1.30
resources: resources:
limits: limits:
cpu: 500m cpu: 500m
@@ -14,6 +14,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/utils/pointer" "k8s.io/utils/pointer"
ctrl "sigs.k8s.io/controller-runtime" ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client"
@@ -28,6 +29,7 @@ type ClusterImageReconciler struct {
Scheme *runtime.Scheme Scheme *runtime.Scheme
MaxParallelJobs int MaxParallelJobs int
ActiveJobs int ActiveJobs int
KubeClient *kubernetes.Clientset
} }
// +kubebuilder:rbac:groups=raczylo.com,resources=*,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=raczylo.com,resources=*,verbs=get;list;watch;create;update;patch;delete
@@ -153,12 +155,16 @@ func (r *ClusterImageReconciler) handleRunningClusterImage(ctx context.Context,
if existingJob.Status.Succeeded > 0 { if existingJob.Status.Succeeded > 0 {
clusterImage.Status.Progress = shared.STATUS_SUCCESS clusterImage.Status.Progress = shared.STATUS_SUCCESS
r.ActiveJobs-- r.ActiveJobs--
if err := r.cleanupJobAndPods(ctx, existingJob); err != nil {
l.Error(err, "unable to cleanup job and pods")
return ctrl.Result{}, err
}
} else if existingJob.Status.Failed > 0 { } else if existingJob.Status.Failed > 0 {
if clusterImage.Status.RetryCount < 3 { if clusterImage.Status.RetryCount < 3 {
clusterImage.Status.Progress = shared.STATUS_RETRYING clusterImage.Status.Progress = shared.STATUS_RETRYING
clusterImage.Status.RetryCount++ clusterImage.Status.RetryCount++
if err := r.Delete(ctx, existingJob); err != nil { if err := r.cleanupJobAndPods(ctx, existingJob); err != nil {
l.Error(err, "unable to delete failed job for retry") l.Error(err, "unable to cleanup failed job and pods for retry")
return ctrl.Result{}, err return ctrl.Result{}, err
} }
r.ActiveJobs-- r.ActiveJobs--
@@ -166,6 +172,10 @@ func (r *ClusterImageReconciler) handleRunningClusterImage(ctx context.Context,
} else { } else {
clusterImage.Status.Progress = shared.STATUS_FAILED clusterImage.Status.Progress = shared.STATUS_FAILED
r.ActiveJobs-- r.ActiveJobs--
if err := r.cleanupJobAndPods(ctx, existingJob); err != nil {
l.Error(err, "unable to cleanup failed job and pods")
return ctrl.Result{}, err
}
} }
} }
@@ -180,29 +190,25 @@ func (r *ClusterImageReconciler) handleRunningClusterImage(ctx context.Context,
return ctrl.Result{}, err return ctrl.Result{}, err
} }
// Delete the completed job
if clusterImage.Status.Progress == shared.STATUS_SUCCESS || clusterImage.Status.Progress == shared.STATUS_FAILED {
if err := r.Delete(ctx, existingJob); err != nil && !errors.IsNotFound(err) {
l.Error(err, "unable to delete completed job")
return ctrl.Result{}, err
}
}
// l.Info("Reconciling ClusterImage completed", "Name", clusterImage.Name, "Status", clusterImage.Status.Progress)
return r.updateClusterImageExportStatus(ctx, clusterImage) return r.updateClusterImageExportStatus(ctx, clusterImage)
} }
func (r *ClusterImageReconciler) cleanupJobPods(ctx context.Context, job *v1batch.Job) error { func (r *ClusterImageReconciler) cleanupJobAndPods(ctx context.Context, job *v1batch.Job) error {
podList := &v1.PodList{} // Delete the job
if err := r.List(ctx, podList, client.InNamespace(job.Namespace), client.MatchingLabels(job.Spec.Selector.MatchLabels)); err != nil { if err := r.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil && !errors.IsNotFound(err) {
return err return fmt.Errorf("failed to delete job: %w", err)
} }
for _, pod := range podList.Items { // Delete the associated pods
if err := r.Delete(ctx, &pod); err != nil && !errors.IsNotFound(err) { labelSelector := metav1.LabelSelector{
return err MatchLabels: job.Spec.Selector.MatchLabels,
} }
listOptions := metav1.ListOptions{
LabelSelector: metav1.FormatLabelSelector(&labelSelector),
}
if err := r.KubeClient.CoreV1().Pods(job.Namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, listOptions); err != nil {
return fmt.Errorf("failed to delete pods: %w", err)
} }
return nil return nil
@@ -355,12 +361,19 @@ func (r *ClusterImageReconciler) handleJobRestarts(ctx context.Context, job *v1b
// SetupWithManager sets up the controller with the Manager. // SetupWithManager sets up the controller with the Manager.
func (r *ClusterImageReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *ClusterImageReconciler) SetupWithManager(mgr ctrl.Manager) error {
// Create a Kubernetes clientset
var err error
config := mgr.GetConfig()
r.KubeClient, err = kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("unable to create Kubernetes client: %w", err)
}
return ctrl.NewControllerManagedBy(mgr). return ctrl.NewControllerManagedBy(mgr).
For(&raczylocomv1.ClusterImage{}). For(&raczylocomv1.ClusterImage{}).
Owns(&v1batch.Job{}). Owns(&v1batch.Job{}).
Complete(r) Complete(r)
} }
func (r *ClusterImageReconciler) removeAllJobsAndContainers(ctx context.Context, namespace string) error { func (r *ClusterImageReconciler) removeAllJobsAndContainers(ctx context.Context, namespace string) error {
jobList := &v1batch.JobList{} jobList := &v1batch.JobList{}
if err := r.List(ctx, jobList, client.InNamespace(namespace), client.MatchingLabels{"app": "image-export"}); err != nil { if err := r.List(ctx, jobList, client.InNamespace(namespace), client.MatchingLabels{"app": "image-export"}); err != nil {