mirror of
https://github.com/lukaszraczylo/kubernetes-images-sync-operator.git
synced 2026-06-09 23:19:15 +00:00
Clean up the code and basic improvements.
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
@@ -13,8 +14,9 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/utils/pointer"
|
||||
"k8s.io/utils/ptr"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
@@ -28,6 +30,7 @@ type ClusterImageReconciler struct {
|
||||
Scheme *runtime.Scheme
|
||||
MaxParallelJobs int
|
||||
ActiveJobs int
|
||||
activeJobsMu sync.Mutex // protects ActiveJobs counter
|
||||
KubeClient *kubernetes.Clientset
|
||||
}
|
||||
|
||||
@@ -66,7 +69,7 @@ func (r *ClusterImageReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
if err := r.Get(ctx, req.NamespacedName, latest); err != nil {
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
|
||||
latest.Status.Progress = shared.STATUS_PENDING
|
||||
if err := r.Status().Update(ctx, latest); err != nil {
|
||||
if errors.IsConflict(err) {
|
||||
@@ -80,7 +83,10 @@ func (r *ClusterImageReconciler) Reconcile(ctx context.Context, req ctrl.Request
|
||||
}
|
||||
|
||||
// If we've reached the maximum number of parallel jobs, requeue
|
||||
if r.ActiveJobs >= r.MaxParallelJobs && clusterImage.Status.Progress == shared.STATUS_PENDING {
|
||||
r.activeJobsMu.Lock()
|
||||
activeJobs := r.ActiveJobs
|
||||
r.activeJobsMu.Unlock()
|
||||
if activeJobs >= r.MaxParallelJobs && clusterImage.Status.Progress == shared.STATUS_PENDING {
|
||||
return ctrl.Result{RequeueAfter: time.Second * 30}, nil
|
||||
}
|
||||
|
||||
@@ -117,7 +123,7 @@ func (r *ClusterImageReconciler) handlePendingClusterImage(ctx context.Context,
|
||||
if err := r.Get(ctx, types.NamespacedName{Name: clusterImage.Name, Namespace: clusterImage.Namespace}, latest); err != nil {
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
|
||||
latest.Status.Progress = shared.STATUS_PRESENT
|
||||
if err := r.Status().Update(ctx, latest); err != nil {
|
||||
if errors.IsConflict(err) {
|
||||
@@ -151,7 +157,9 @@ func (r *ClusterImageReconciler) handlePendingClusterImage(ctx context.Context,
|
||||
}
|
||||
|
||||
// Increment the active jobs count
|
||||
r.activeJobsMu.Lock()
|
||||
r.ActiveJobs++
|
||||
r.activeJobsMu.Unlock()
|
||||
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
@@ -217,7 +225,9 @@ func (r *ClusterImageReconciler) handleRunningClusterImage(ctx context.Context,
|
||||
}
|
||||
|
||||
latest.Status.Progress = shared.STATUS_SUCCESS
|
||||
r.activeJobsMu.Lock()
|
||||
r.ActiveJobs--
|
||||
r.activeJobsMu.Unlock()
|
||||
// Update the status before cleaning up the job
|
||||
if err := r.Status().Update(ctx, latest); err != nil {
|
||||
if errors.IsConflict(err) {
|
||||
@@ -232,7 +242,9 @@ func (r *ClusterImageReconciler) handleRunningClusterImage(ctx context.Context,
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
} else if existingJob.Status.Failed > 0 {
|
||||
r.activeJobsMu.Lock()
|
||||
r.ActiveJobs--
|
||||
r.activeJobsMu.Unlock()
|
||||
if clusterImage.Status.RetryCount < 3 {
|
||||
// Cleanup the failed job before retrying
|
||||
if err := r.cleanupJobAndPods(ctx, existingJob); err != nil {
|
||||
@@ -300,8 +312,22 @@ func (r *ClusterImageReconciler) handleRunningClusterImage(ctx context.Context,
|
||||
return r.updateClusterImageExportStatus(ctx, clusterImage)
|
||||
}
|
||||
func (r *ClusterImageReconciler) cleanupJobAndPods(ctx context.Context, job *v1batch.Job) error {
|
||||
// Add a short delay to allow status updates to propagate
|
||||
time.Sleep(2 * time.Second)
|
||||
// Wait for job status to propagate before deletion
|
||||
jobKey := types.NamespacedName{Name: job.Name, Namespace: job.Namespace}
|
||||
err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (done bool, err error) {
|
||||
currentJob := &v1batch.Job{}
|
||||
if err := r.Get(ctx, jobKey, currentJob); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
return true, nil // Job already deleted
|
||||
}
|
||||
return false, nil // Retry on transient errors
|
||||
}
|
||||
// Job status has been updated, proceed with deletion
|
||||
return currentJob.Status.Active == 0, nil
|
||||
})
|
||||
if err != nil && !errors.IsNotFound(err) && err != context.DeadlineExceeded {
|
||||
return fmt.Errorf("failed to wait for job status: %w", err)
|
||||
}
|
||||
|
||||
// Delete the job
|
||||
if err := r.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil && !errors.IsNotFound(err) {
|
||||
@@ -374,8 +400,8 @@ func (r *ClusterImageReconciler) createBackupJob(ctx context.Context, clusterIma
|
||||
Kind: clusterImage.Kind,
|
||||
Name: clusterImage.Name,
|
||||
UID: clusterImage.UID,
|
||||
BlockOwnerDeletion: pointer.Bool(true),
|
||||
Controller: pointer.Bool(true),
|
||||
BlockOwnerDeletion: ptr.To(true),
|
||||
Controller: ptr.To(true),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ package raczylocom
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"crypto/md5" // #nosec G501 - MD5 used for non-cryptographic unique identifiers only
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/util/retry"
|
||||
"k8s.io/utils/pointer"
|
||||
"k8s.io/utils/ptr"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
||||
@@ -27,7 +27,7 @@ import (
|
||||
// ClusterImageExportReconciler reconciles a ClusterImageExport object
|
||||
type ClusterImageExportReconciler struct {
|
||||
client.Client
|
||||
Scheme *runtime.Scheme
|
||||
Scheme *runtime.Scheme
|
||||
podAnnotations map[string]string
|
||||
}
|
||||
|
||||
@@ -121,6 +121,7 @@ func (r *ClusterImageExportReconciler) Reconcile(ctx context.Context, req ctrl.R
|
||||
|
||||
for _, image := range fullImagesList.Containers {
|
||||
// Include creation timestamp in the hash to differentiate between exports with the same name
|
||||
// #nosec G401 - MD5 used for non-cryptographic unique identifier generation, not security
|
||||
nameHash := fmt.Sprintf("%x", md5.Sum([]byte(clusterImageExport.Name+image.Image+image.Tag+image.Sha+
|
||||
clusterImageExport.Annotations["export.raczylo.com/creation-timestamp"])))[:14]
|
||||
|
||||
@@ -156,7 +157,7 @@ func (r *ClusterImageExportReconciler) Reconcile(ctx context.Context, req ctrl.R
|
||||
Kind: clusterImageExport.Kind,
|
||||
Name: clusterImageExport.Name,
|
||||
UID: clusterImageExport.UID,
|
||||
Controller: pointer.Bool(true),
|
||||
Controller: ptr.To(true),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -185,7 +186,7 @@ func (r *ClusterImageExportReconciler) Reconcile(ctx context.Context, req ctrl.R
|
||||
failedCount := 0
|
||||
pendingCount := 0
|
||||
clusterImageList := &raczylocomv1.ClusterImageList{}
|
||||
if err := r.List(ctx, clusterImageList, client.InNamespace(clusterImageExport.Namespace),
|
||||
if err := r.List(ctx, clusterImageList, client.InNamespace(clusterImageExport.Namespace),
|
||||
client.MatchingFields{"spec.exportName": clusterImageExport.Name}); err != nil {
|
||||
l.Error(err, "unable to list ClusterImages")
|
||||
return ctrl.Result{}, err
|
||||
@@ -250,21 +251,6 @@ func (r *ClusterImageExportReconciler) updateStatusWithRetry(ctx context.Context
|
||||
})
|
||||
}
|
||||
|
||||
func (r *ClusterImageExportReconciler) checkAllClusterImagesCompleted(ctx context.Context, clusterImageExport *raczylocomv1.ClusterImageExport) (bool, error) {
|
||||
clusterImageList := &raczylocomv1.ClusterImageList{}
|
||||
if err := r.List(ctx, clusterImageList, client.InNamespace(clusterImageExport.Namespace), client.MatchingFields{"spec.exportName": clusterImageExport.Name}); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, ci := range clusterImageList.Items {
|
||||
if ci.Status.Progress != shared.STATUS_SUCCESS && ci.Status.Progress != shared.STATUS_PRESENT {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
|
||||
func (r *ClusterImageExportReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
@@ -409,7 +395,7 @@ func (r *ClusterImageExportReconciler) runCleanupJob(ctx context.Context, cluste
|
||||
}
|
||||
|
||||
// Set up the cleanup job with retry limits and TTL
|
||||
backoffLimit := int32(2) // 3 total attempts (initial + 2 retries)
|
||||
backoffLimit := int32(2) // 3 total attempts (initial + 2 retries)
|
||||
ttlSecondsAfterFinished := int32(300) // Delete job 5 minutes after completion
|
||||
|
||||
// Merge annotations from different sources
|
||||
@@ -443,16 +429,16 @@ func (r *ClusterImageExportReconciler) runCleanupJob(ctx context.Context, cluste
|
||||
}
|
||||
|
||||
jobParams := shared.JobParams{
|
||||
Name: normalisedImageName,
|
||||
Namespace: clusterImageExport.Namespace,
|
||||
Image: shared.BACKUP_JOB_IMAGE,
|
||||
Commands: defaultCommands,
|
||||
Annotations: mergedAnnotations,
|
||||
ServiceAccount: "",
|
||||
ImagePullSecrets: clusterImageExport.Spec.ImagePullSecrets,
|
||||
BackoffLimit: &backoffLimit,
|
||||
Name: normalisedImageName,
|
||||
Namespace: clusterImageExport.Namespace,
|
||||
Image: shared.BACKUP_JOB_IMAGE,
|
||||
Commands: defaultCommands,
|
||||
Annotations: mergedAnnotations,
|
||||
ServiceAccount: "",
|
||||
ImagePullSecrets: clusterImageExport.Spec.ImagePullSecrets,
|
||||
BackoffLimit: &backoffLimit,
|
||||
TTLSecondsAfterFinished: &ttlSecondsAfterFinished,
|
||||
EnvVars: envVars,
|
||||
EnvVars: envVars,
|
||||
}
|
||||
|
||||
cleanupJob := shared.CreateJob(jobParams, func(raczylocomv1.ClusterImageExport) []string { return nil })
|
||||
|
||||
Reference in New Issue
Block a user