From dbb673107ca1ebcd72ec1eb920c5406b8ad96a60 Mon Sep 17 00:00:00 2001 From: Lukasz Raczylo Date: Thu, 18 Dec 2025 01:23:16 +0000 Subject: [PATCH] More fixes, moving from python to golang worker. --- .github/workflows/release.yaml | 40 - .goreleaser.yaml | 42 +- .../Dockerfile => Dockerfile.worker | 18 +- README.md | 85 +- .../v1/clusterimageexport_types.go | 25 + cmd/worker/main.go | 369 +++++ .../raczylo.com_clusterimageexports.yaml | 32 + docker-image-worker/cleanup.py | 67 - docker-image-worker/export.py | 106 -- docker-image-worker/requirements.txt | 4 - docker-image-worker/s3_utils.py | 228 --- go.mod | 23 +- go.sum | 38 + .../raczylo.com/clusterimage_controller.go | 4 +- .../clusterimageexport_controller.go | 139 +- .../raczylo.com/controller_unit_test.go | 1031 ++++++++++++++ internal/shared/definitions_test.go | 643 +++++++++ internal/shared/jobs_test.go | 547 ++++++++ internal/shared/k8s_test.go | 1219 +++++++++++++++++ 19 files changed, 4202 insertions(+), 458 deletions(-) rename docker-image-worker/Dockerfile => Dockerfile.worker (73%) create mode 100644 cmd/worker/main.go delete mode 100755 docker-image-worker/cleanup.py delete mode 100755 docker-image-worker/export.py delete mode 100644 docker-image-worker/requirements.txt delete mode 100644 docker-image-worker/s3_utils.py create mode 100644 internal/controller/raczylo.com/controller_unit_test.go create mode 100644 internal/shared/definitions_test.go create mode 100644 internal/shared/jobs_test.go create mode 100644 internal/shared/k8s_test.go diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8709630..269664a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -24,46 +24,6 @@ jobs: docker-enabled: true secrets: inherit - build-worker-image: - needs: release - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Get release version - id: version - run: | - VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "0.0.0") - VERSION=${VERSION#v} - echo "version=$VERSION" >> $GITHUB_OUTPUT - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push worker image - uses: docker/build-push-action@v5 - with: - context: ./docker-image-worker - file: ./docker-image-worker/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: | - ghcr.io/lukaszraczylo/kubernetes-images-sync-worker:${{ steps.version.outputs.version }} - ghcr.io/lukaszraczylo/kubernetes-images-sync-worker:latest - publish-helm-chart: needs: release runs-on: ubuntu-latest diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 3dcdeac..f4b8a0a 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -34,6 +34,22 @@ builds: - amd64 - arm64 + - id: worker + main: ./cmd/worker + binary: worker + env: + - CGO_ENABLED=0 + flags: + - -trimpath + ldflags: + - -s -w + - -X main.Version={{.Version}} + goos: + - linux + goarch: + - amd64 + - arm64 + archives: - id: default formats: @@ -97,7 +113,8 @@ release: ``` dockers_v2: - - ids: + - id: operator + ids: - manager images: - "ghcr.io/lukaszraczylo/kubernetes-images-sync-operator" @@ -114,6 +131,29 @@ dockers_v2: "org.opencontainers.image.source": "https://github.com/lukaszraczylo/kubernetes-images-sync-operator" "org.opencontainers.image.description": "Kubernetes operator for backing up and syncing container images" + - id: worker + ids: + - worker + images: + - "ghcr.io/lukaszraczylo/kubernetes-images-sync-worker" + tags: + - "{{ .Version }}" + - "latest" + platforms: + - linux/amd64 + - linux/arm64 + dockerfile: Dockerfile.worker + extra_files: + - docker-image-worker/storage.conf + - docker-image-worker/containers.conf + - docker-image-worker/registries.conf + - docker-image-worker/podman-preauth.sh + labels: + "org.opencontainers.image.title": "kubernetes-images-sync-worker" + "org.opencontainers.image.version": "{{ .Version }}" + "org.opencontainers.image.source": "https://github.com/lukaszraczylo/kubernetes-images-sync-operator" + "org.opencontainers.image.description": "Worker image for backing up container images to S3 or local storage" + signs: - cmd: cosign signature: "${artifact}.sigstore.json" diff --git a/docker-image-worker/Dockerfile b/Dockerfile.worker similarity index 73% rename from docker-image-worker/Dockerfile rename to Dockerfile.worker index f78befc..1f5b64b 100644 --- a/docker-image-worker/Dockerfile +++ b/Dockerfile.worker @@ -7,7 +7,6 @@ ARG TARGETARCH RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ gnupg2 \ - python3-pip \ sudo \ jq \ && rm -rf /var/lib/apt/lists/* @@ -30,11 +29,18 @@ RUN adduser --disabled-password --gecos "" --uid 1001 runner \ WORKDIR /home/runner -COPY storage.conf containers.conf registries.conf /home/runner/.config/containers/ -COPY requirements.txt export.py cleanup.py s3_utils.py podman-preauth.sh ./ +# Copy container configuration files +COPY docker-image-worker/storage.conf docker-image-worker/containers.conf docker-image-worker/registries.conf /home/runner/.config/containers/ + +# Copy the entrypoint script +COPY docker-image-worker/podman-preauth.sh ./ + +# Copy the worker binary (from goreleaser build context) +COPY $TARGETPLATFORM/worker ./ + USER runner RUN sudo chown -R runner:runner /home/runner/.config \ - && python3 -m pip install --no-cache-dir --only-binary=:all: -r requirements.txt \ - && sudo chmod +x podman-preauth.sh + && sudo chmod +x podman-preauth.sh worker + ENTRYPOINT ["/home/runner/podman-preauth.sh"] -CMD ["bash", "-c"] \ No newline at end of file +CMD ["bash", "-c"] diff --git a/README.md b/README.md index 9b9e284..6c7c05f 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,13 @@ helm install raczylo/kube-images-sync Please remember that backups are triggered whenever the new object appears -``` +```yaml apiVersion: raczylo.com/v1 kind: ClusterImageExport metadata: name: backup-20240901 spec: + name: backup-20240901 jobAnnotations: my-fancy-export: 11-09-2024 # Excludes will remove all images with listed wording from the backup list @@ -68,6 +69,88 @@ spec: maxConcurrentJobs: 1 ``` +## Automatic Cleanup (TTL & Retention) + +To prevent old exports from accumulating, you can configure automatic cleanup using TTL (time-based) or retention policies (count-based). + +> **WARNING**: When a ClusterImageExport is deleted, the actual backed up images in storage are also deleted. Make sure your retention settings align with your backup requirements. + +### TTL-based cleanup + +Delete exports after a specified number of days: + +```yaml +apiVersion: raczylo.com/v1 +kind: ClusterImageExport +metadata: + name: daily-backup-2024-12-18 +spec: + name: daily-backup + basePath: /backups/daily + storage: + target: S3 + s3: + bucket: my-backup-bucket + region: eu-west-1 + useRole: true + maxConcurrentJobs: 5 + # Delete this backup 30 days after completion + ttlDaysAfterFinished: 30 +``` + +### Retention-based cleanup + +Keep only the last N successful/failed exports per base path: + +```yaml +apiVersion: raczylo.com/v1 +kind: ClusterImageExport +metadata: + name: weekly-backup-2024-w51 +spec: + name: weekly-backup + basePath: /backups/weekly + storage: + target: S3 + s3: + bucket: my-backup-bucket + region: eu-west-1 + useRole: true + maxConcurrentJobs: 5 + # Keep the last 12 successful backups (3 months of weekly backups) + # Keep only the last 2 failed backups for debugging + retention: + maxSuccessful: 12 + maxFailed: 2 +``` + +### Combined TTL + Retention + +You can use both policies together. The export will be deleted when either condition is met: + +```yaml +apiVersion: raczylo.com/v1 +kind: ClusterImageExport +metadata: + name: monthly-backup-2024-12 +spec: + name: monthly-backup + basePath: /backups/monthly + storage: + target: S3 + s3: + bucket: my-backup-bucket + region: eu-west-1 + useRole: true + maxConcurrentJobs: 10 + # Keep backups for up to 1 year + ttlDaysAfterFinished: 365 + # But also limit to last 12 monthly backups + retention: + maxSuccessful: 12 + maxFailed: 1 +``` + ## Worth knowing * If you provide roleARN, you also need to set the useRole to true. diff --git a/api/raczylo.com/v1/clusterimageexport_types.go b/api/raczylo.com/v1/clusterimageexport_types.go index 3d1d56c..1a18478 100644 --- a/api/raczylo.com/v1/clusterimageexport_types.go +++ b/api/raczylo.com/v1/clusterimageexport_types.go @@ -21,6 +21,18 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// RetentionPolicy defines how many completed ClusterImageExport resources to keep +type RetentionPolicy struct { + // Maximum number of successful exports to keep + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=3 + MaxSuccessful *int32 `json:"maxSuccessful,omitempty"` + // Maximum number of failed exports to keep + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=1 + MaxFailed *int32 `json:"maxFailed,omitempty"` +} + type ClusterImageStorageS3 struct { // Bucket name Bucket string `json:"bucket"` @@ -72,6 +84,17 @@ type ClusterImageExportSpec struct { // +kubebuilder:default=5 MaxConcurrentJobs int `json:"maxConcurrentJobs"` AdditionalImages []string `json:"additionalImages,omitempty"` + // TTLDaysAfterFinished specifies how many days to keep completed exports. + // If set, the export (and its backed up images) will be deleted after this many days. + // WARNING: Deletion removes both the CRD and the actual backed up images from storage. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Optional + TTLDaysAfterFinished *int32 `json:"ttlDaysAfterFinished,omitempty"` + // Retention specifies how many completed exports to keep per base path. + // Oldest exports beyond this limit will be deleted (including their backed up images). + // WARNING: Deletion removes both the CRD and the actual backed up images from storage. + // +kubebuilder:validation:Optional + Retention *RetentionPolicy `json:"retention,omitempty"` } // ClusterImageExportStatus defines the observed state of ClusterImageExport @@ -81,6 +104,8 @@ type ClusterImageExportStatus struct { TotalImages int `json:"totalImages,omitempty"` // Number of images that have completed export CompletedImages int `json:"completedImages,omitempty"` + // CompletedAt is the timestamp when the export completed (SUCCESS or FAILED) + CompletedAt *metav1.Time `json:"completedAt,omitempty"` } // +kubebuilder:object:root=true diff --git a/cmd/worker/main.go b/cmd/worker/main.go new file mode 100644 index 0000000..c61b557 --- /dev/null +++ b/cmd/worker/main.go @@ -0,0 +1,369 @@ +package main + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/credentials/stscreds" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/aws-sdk-go-v2/service/sts" + "github.com/spf13/cobra" +) + +var ( + // Global flags + useRole bool + useCurrentRole bool + roleName string + awsAccessKeyID string + awsSecretKey string + endpointURL string + region string + maxRetries int + retryDelay time.Duration +) + +func main() { + rootCmd := &cobra.Command{ + Use: "worker", + Short: "Kubernetes Images Sync Worker", + Long: "Worker for backing up container images to S3 or local storage", + } + + // Add global flags + rootCmd.PersistentFlags().BoolVar(&useRole, "use_role", false, "Use IAM role for authentication") + rootCmd.PersistentFlags().BoolVar(&useCurrentRole, "use_current_role", false, "Use current AWS role (e.g., from Kubernetes service account)") + rootCmd.PersistentFlags().StringVar(&roleName, "role_name", "", "The name of the IAM role to assume (only when --use_role is set)") + rootCmd.PersistentFlags().StringVar(&awsAccessKeyID, "aws_access_key_id", "", "AWS access key ID") + rootCmd.PersistentFlags().StringVar(&awsSecretKey, "aws_secret_access_key", "", "AWS secret access key") + rootCmd.PersistentFlags().StringVar(&endpointURL, "endpoint_url", "", "S3-compatible endpoint URL") + rootCmd.PersistentFlags().StringVar(®ion, "region", "", "AWS region") + rootCmd.PersistentFlags().IntVar(&maxRetries, "max_retries", 5, "Maximum number of retries") + rootCmd.PersistentFlags().DurationVar(&retryDelay, "retry_delay", 5*time.Second, "Delay between retries") + + // Add commands + rootCmd.AddCommand(exportCmd()) + rootCmd.AddCommand(cleanupCmd()) + + if err := rootCmd.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func exportCmd() *cobra.Command { + return &cobra.Command{ + Use: "export ", + Short: "Export a file to S3 or local destination", + Long: "Transfer a file from a local source to either a local destination or an S3 bucket", + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + source := args[0] + destination := args[1] + return runExport(source, destination) + }, + } +} + +func cleanupCmd() *cobra.Command { + return &cobra.Command{ + Use: "cleanup ", + Short: "Remove a directory from S3 or local filesystem", + Long: "Remove a directory recursively, either local or in an S3 bucket", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + destination := args[0] + return runCleanup(destination) + }, + } +} + +func runExport(source, destination string) error { + // Check if source file exists + if _, err := os.Stat(source); os.IsNotExist(err) { + return fmt.Errorf("source file '%s' does not exist", source) + } + + var lastErr error + for attempt := 1; attempt <= maxRetries; attempt++ { + if attempt > 1 { + fmt.Printf("Retry attempt %d/%d after %v\n", attempt, maxRetries, retryDelay) + time.Sleep(retryDelay) + } + + var err error + if strings.HasPrefix(destination, "s3://") { + err = uploadToS3(source, destination) + } else { + err = copyLocal(source, destination) + } + + if err == nil { + fmt.Printf("Transfer completed successfully: %s -> %s\n", source, destination) + return nil + } + lastErr = err + fmt.Printf("Attempt %d failed: %v\n", attempt, err) + } + + return fmt.Errorf("transfer failed after %d attempts: %w", maxRetries, lastErr) +} + +func runCleanup(destination string) error { + var lastErr error + for attempt := 1; attempt <= maxRetries; attempt++ { + if attempt > 1 { + fmt.Printf("Retry attempt %d/%d after %v\n", attempt, maxRetries, retryDelay) + time.Sleep(retryDelay) + } + + var err error + if strings.HasPrefix(destination, "s3://") { + err = deleteFromS3(destination) + } else { + err = deleteLocal(destination) + } + + if err == nil { + fmt.Printf("Cleanup completed successfully: %s\n", destination) + return nil + } + lastErr = err + fmt.Printf("Attempt %d failed: %v\n", attempt, err) + } + + return fmt.Errorf("cleanup failed after %d attempts: %w", maxRetries, lastErr) +} + +func getS3Client(ctx context.Context) (*s3.Client, error) { + var cfg aws.Config + var err error + + // Determine region + awsRegion := region + if awsRegion == "" { + awsRegion = os.Getenv("AWS_REGION") + } + if awsRegion == "" { + awsRegion = os.Getenv("AWS_DEFAULT_REGION") + } + + // Build config options + optFns := []func(*config.LoadOptions) error{} + + if awsRegion != "" { + optFns = append(optFns, config.WithRegion(awsRegion)) + } + + // Load base config + cfg, err = config.LoadDefaultConfig(ctx, optFns...) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // Handle authentication methods + if awsAccessKeyID != "" && awsSecretKey != "" { + // Use explicit credentials + fmt.Println("Using explicit AWS credentials") + cfg.Credentials = credentials.NewStaticCredentialsProvider(awsAccessKeyID, awsSecretKey, "") + } else if useRole && roleName != "" { + // Assume specific role + fmt.Printf("Attempting to assume role: %s\n", roleName) + stsClient := sts.NewFromConfig(cfg) + + // Get account ID for role ARN + identity, err := stsClient.GetCallerIdentity(ctx, &sts.GetCallerIdentityInput{}) + if err != nil { + return nil, fmt.Errorf("failed to get caller identity: %w", err) + } + + roleARN := fmt.Sprintf("arn:aws:iam::%s:role/%s", *identity.Account, roleName) + cfg.Credentials = stscreds.NewAssumeRoleProvider(stsClient, roleARN) + } else if useCurrentRole { + // Use current role (default credential chain handles this) + fmt.Println("Using current role from environment") + // The default config already uses the credential chain which includes + // web identity token if AWS_WEB_IDENTITY_TOKEN_FILE is set + } else { + fmt.Println("Using default credential provider chain") + } + + // Create S3 client options + s3Opts := []func(*s3.Options){} + if endpointURL != "" { + s3Opts = append(s3Opts, func(o *s3.Options) { + o.BaseEndpoint = aws.String(endpointURL) + o.UsePathStyle = true // Required for most S3-compatible services + }) + } + + return s3.NewFromConfig(cfg, s3Opts...), nil +} + +func parseS3Path(s3Path string) (bucket, key string) { + path := strings.TrimPrefix(s3Path, "s3://") + parts := strings.SplitN(path, "/", 2) + bucket = parts[0] + if len(parts) > 1 { + key = parts[1] + } + return +} + +func uploadToS3(source, destination string) error { + ctx := context.Background() + + client, err := getS3Client(ctx) + if err != nil { + return fmt.Errorf("failed to create S3 client: %w", err) + } + + bucket, key := parseS3Path(destination) + + file, err := os.Open(source) // #nosec G304 -- source path is provided by operator via CLI args + if err != nil { + return fmt.Errorf("failed to open source file: %w", err) + } + defer file.Close() + + fmt.Printf("Uploading %s to s3://%s/%s\n", source, bucket, key) + + _, err = client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + Body: file, + }) + if err != nil { + return fmt.Errorf("failed to upload to S3: %w", err) + } + + return nil +} + +func copyLocal(source, destination string) error { + // Create destination directory if it doesn't exist + destDir := filepath.Dir(destination) + if err := os.MkdirAll(destDir, 0750); err != nil { // #nosec G301 -- restricted permissions for backup directory + return fmt.Errorf("failed to create destination directory: %w", err) + } + + // Open source file + srcFile, err := os.Open(source) // #nosec G304 -- source path is provided by operator via CLI args + if err != nil { + return fmt.Errorf("failed to open source file: %w", err) + } + defer srcFile.Close() + + // Get source file info for permissions + srcInfo, err := srcFile.Stat() + if err != nil { + return fmt.Errorf("failed to stat source file: %w", err) + } + + // Create destination file + dstFile, err := os.OpenFile(destination, os.O_RDWR|os.O_CREATE|os.O_TRUNC, srcInfo.Mode()) // #nosec G304 -- destination path is provided by operator via CLI args + if err != nil { + return fmt.Errorf("failed to create destination file: %w", err) + } + defer dstFile.Close() + + // Copy content + if _, err := io.Copy(dstFile, srcFile); err != nil { + return fmt.Errorf("failed to copy file content: %w", err) + } + + fmt.Printf("Copied %s to %s\n", source, destination) + return nil +} + +func deleteFromS3(destination string) error { + ctx := context.Background() + + client, err := getS3Client(ctx) + if err != nil { + return fmt.Errorf("failed to create S3 client: %w", err) + } + + bucket, prefix := parseS3Path(destination) + + fmt.Printf("Deleting objects from s3://%s/%s\n", bucket, prefix) + + // List and delete objects + paginator := s3.NewListObjectsV2Paginator(client, &s3.ListObjectsV2Input{ + Bucket: aws.String(bucket), + Prefix: aws.String(prefix), + }) + + totalDeleted := 0 + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return fmt.Errorf("failed to list objects: %w", err) + } + + if len(page.Contents) == 0 { + continue + } + + // Build list of objects to delete + var objectsToDelete []string + for _, obj := range page.Contents { + objectsToDelete = append(objectsToDelete, *obj.Key) + } + + // Delete objects in batches of 1000 (S3 limit) + for i := 0; i < len(objectsToDelete); i += 1000 { + end := i + 1000 + if end > len(objectsToDelete) { + end = len(objectsToDelete) + } + + batch := objectsToDelete[i:end] + deleteObjects := make([]types.ObjectIdentifier, len(batch)) + for j, key := range batch { + deleteObjects[j] = types.ObjectIdentifier{Key: aws.String(key)} + } + + _, err := client.DeleteObjects(ctx, &s3.DeleteObjectsInput{ + Bucket: aws.String(bucket), + Delete: &types.Delete{ + Objects: deleteObjects, + Quiet: aws.Bool(true), + }, + }) + if err != nil { + return fmt.Errorf("failed to delete objects: %w", err) + } + + totalDeleted += len(batch) + } + } + + fmt.Printf("Deleted %d objects from s3://%s/%s\n", totalDeleted, bucket, prefix) + return nil +} + +func deleteLocal(destination string) error { + // Check if path exists + if _, err := os.Stat(destination); os.IsNotExist(err) { + fmt.Printf("Directory %s does not exist, nothing to delete\n", destination) + return nil + } + + // Remove directory recursively + if err := os.RemoveAll(destination); err != nil { + return fmt.Errorf("failed to remove directory: %w", err) + } + + fmt.Printf("Deleted directory %s\n", destination) + return nil +} diff --git a/config/crd/bases/raczylo.com_clusterimageexports.yaml b/config/crd/bases/raczylo.com_clusterimageexports.yaml index e51baf6..7aa66b9 100644 --- a/config/crd/bases/raczylo.com_clusterimageexports.yaml +++ b/config/crd/bases/raczylo.com_clusterimageexports.yaml @@ -118,6 +118,25 @@ spec: items: type: string type: array + retention: + description: |- + Retention specifies how many completed exports to keep per base path. + Oldest exports beyond this limit will be deleted (including their backed up images). + WARNING: Deletion removes both the CRD and the actual backed up images from storage. + properties: + maxFailed: + default: 1 + description: Maximum number of failed exports to keep + format: int32 + minimum: 0 + type: integer + maxSuccessful: + default: 3 + description: Maximum number of successful exports to keep + format: int32 + minimum: 0 + type: integer + type: object storage: description: ClusterImageStorageSpec defines the desired state of ClusterImageStorage @@ -160,6 +179,14 @@ spec: required: - target type: object + ttlDaysAfterFinished: + description: |- + TTLDaysAfterFinished specifies how many days to keep completed exports. + If set, the export (and its backed up images) will be deleted after this many days. + WARNING: Deletion removes both the CRD and the actual backed up images from storage. + format: int32 + minimum: 1 + type: integer required: - basePath - maxConcurrentJobs @@ -169,6 +196,11 @@ spec: status: description: ClusterImageExportStatus defines the observed state of ClusterImageExport properties: + completedAt: + description: CompletedAt is the timestamp when the export completed + (SUCCESS or FAILED) + format: date-time + type: string completedImages: description: Number of images that have completed export type: integer diff --git a/docker-image-worker/cleanup.py b/docker-image-worker/cleanup.py deleted file mode 100755 index 5d08026..0000000 --- a/docker-image-worker/cleanup.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys -import argparse -from botocore.exceptions import ClientError -from tenacity import retry, stop_after_attempt, wait_fixed - -sys.path.append(os.path.dirname(os.path.abspath(__file__))) - -from s3_utils import get_s3_client, parse_s3_path, add_common_arguments, validate_args - -@retry(stop=stop_after_attempt(5), wait=wait_fixed(5)) -def remove_directory(destination, use_role=False, role_name=None, aws_access_key_id=None, aws_secret_access_key=None, endpoint_url=None, region=None): - """ - Remove a directory recursively, either local or in an S3 bucket - """ - if destination.startswith('s3://'): - # Removing from S3 - s3_client = get_s3_client(use_role, role_name, aws_access_key_id, aws_secret_access_key, endpoint_url, region) - bucket, prefix = parse_s3_path(destination) - try: - paginator = s3_client.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=bucket, Prefix=prefix): - if 'Contents' in page: - objects_to_delete = [{'Key': obj['Key']} for obj in page['Contents']] - s3_client.delete_objects(Bucket=bucket, Delete={'Objects': objects_to_delete}) - print(f"Directory {destination} removed successfully from S3") - except ClientError as e: - print(f"Error removing directory from S3: {str(e)}") - return False - else: - # Removing local directory - try: - import shutil - if os.path.exists(destination): - shutil.rmtree(destination) - print(f"Directory {destination} removed successfully") - else: - print(f"Directory {destination} does not exist") - except IOError as e: - print(f"Error removing directory: {str(e)}") - return False - return True - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Remove a directory recursively, either local or in an S3 bucket.") - parser.add_argument("destination", help="The directory path (local) or S3 path (e.g., 's3://bucket/prefix') to remove") - add_common_arguments(parser) - - args = parser.parse_args() - validate_args(args, parser) - - success = remove_directory( - args.destination, - args.use_role, - args.role_name, - args.aws_access_key_id, - args.aws_secret_access_key, - args.endpoint_url, - args.region - ) - - if success: - print("Cleanup completed successfully.") - else: - print("Cleanup failed.") - exit(1) \ No newline at end of file diff --git a/docker-image-worker/export.py b/docker-image-worker/export.py deleted file mode 100755 index e4e671c..0000000 --- a/docker-image-worker/export.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys -import argparse -import logging -from botocore.exceptions import ClientError, BotoCoreError -from tenacity import retry, stop_after_attempt, wait_fixed - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from s3_utils import get_s3_client, parse_s3_path, add_common_arguments, validate_args - -def log_error_details(e): - """Log detailed error information from AWS exceptions""" - if hasattr(e, 'response'): - error_code = e.response.get('Error', {}).get('Code', 'Unknown') - error_message = e.response.get('Error', {}).get('Message', str(e)) - request_id = e.response.get('ResponseMetadata', {}).get('RequestId', 'Unknown') - logger.error(f"AWS Error Details:") - logger.error(f"- Error Code: {error_code}") - logger.error(f"- Error Message: {error_message}") - logger.error(f"- Request ID: {request_id}") - logger.error(f"- Full Response: {e.response}") - else: - logger.error(f"Non-AWS Error: {str(e)}") - -@retry(stop=stop_after_attempt(5), wait=wait_fixed(5)) -def transfer_file(source, destination, use_role=False, role_name=None, use_current_role=False, aws_access_key_id=None, aws_secret_access_key=None, endpoint_url=None, region=None): - """ - Transfer a file from a local source to either a local destination or an S3 bucket - """ - if not os.path.isfile(source): - logger.error(f"Error: Source file '{source}' does not exist or is not a file.") - return False - - if destination.startswith('s3://'): - # Uploading to S3 - try: - logger.info(f"Attempting to upload {source} to {destination}") - s3_client = get_s3_client(use_role, role_name, use_current_role, aws_access_key_id, aws_secret_access_key, endpoint_url, region) - bucket, s3_key = parse_s3_path(destination) - - try: - s3_client.upload_file(source, bucket, s3_key) - logger.info(f"File {source} uploaded successfully to {destination}") - except ClientError as e: - log_error_details(e) - if "AccessDenied" in str(e): - logger.error("Access denied. Please check:") - logger.error("1. IAM role/user permissions") - logger.error("2. S3 bucket permissions") - logger.error("3. Web identity token configuration") - return False - except BotoCoreError as e: - logger.error(f"Boto3 error during upload: {str(e)}") - return False - - except Exception as e: - logger.error(f"Unexpected error during S3 client creation or upload: {str(e)}") - return False - else: - # Copying to local destination - try: - import shutil - logger.info(f"Attempting to copy {source} to local destination {destination}") - # Create destination directory if it doesn't exist - os.makedirs(os.path.dirname(destination), exist_ok=True) - shutil.copy2(source, destination) - logger.info(f"File {source} copied successfully to {destination}") - except IOError as e: - logger.error(f"Error copying file: {str(e)}") - return False - return True - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Transfer a file from a local source to either a local destination or an S3 bucket.") - parser.add_argument("source", help="The local source file path") - parser.add_argument("destination", help="The destination file path (local) or S3 path (e.g., 's3://bucket/key')") - add_common_arguments(parser) - - args = parser.parse_args() - validate_args(args, parser) - - success = transfer_file( - args.source, - args.destination, - args.use_role, - args.role_name, - args.use_current_role, - args.aws_access_key_id, - args.aws_secret_access_key, - args.endpoint_url, - args.region - ) - - if success: - logger.info("Transfer completed successfully.") - else: - logger.error("Transfer failed.") - exit(1) diff --git a/docker-image-worker/requirements.txt b/docker-image-worker/requirements.txt deleted file mode 100644 index 709f80d..0000000 --- a/docker-image-worker/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -boto3 -botocore -jmespath -tenacity \ No newline at end of file diff --git a/docker-image-worker/s3_utils.py b/docker-image-worker/s3_utils.py deleted file mode 100644 index 6891d86..0000000 --- a/docker-image-worker/s3_utils.py +++ /dev/null @@ -1,228 +0,0 @@ -import boto3 -from botocore.exceptions import ClientError - -import os -import logging - -def get_s3_client(use_role=False, role_name=None, use_current_role=False, aws_access_key_id=None, aws_secret_access_key=None, endpoint_url=None, region=None): - """ - Create and return an S3 client based on the provided authentication method, endpoint, and region. - """ - logging.basicConfig(level=logging.INFO) - logger = logging.getLogger(__name__) - - client_kwargs = {} - - # Log authentication method being attempted - logger.info("Attempting S3 client creation with:") - logger.info(f"- Region: {region if region else 'default'}") - logger.info(f"- Endpoint URL: {endpoint_url if endpoint_url else 'default'}") - - if endpoint_url: - client_kwargs['endpoint_url'] = endpoint_url - if region: - client_kwargs['region_name'] = region - - # Check for AWS Web Identity token - token_file = os.environ.get('AWS_WEB_IDENTITY_TOKEN_FILE') - role_arn = os.environ.get('AWS_ROLE_ARN') - if token_file or role_arn: - logger.info("AWS Web Identity configuration detected:") - logger.info(f"- Token file path: {token_file}") - logger.info(f"- Role ARN: {role_arn}") - logger.info(f"- Session name: {os.environ.get('AWS_ROLE_SESSION_NAME', 'default')}") - - if aws_access_key_id and aws_secret_access_key: - logger.info("Using explicit AWS credentials") - # Use explicit credentials if provided - client_kwargs['aws_access_key_id'] = aws_access_key_id - client_kwargs['aws_secret_access_key'] = aws_secret_access_key - return boto3.client('s3', **client_kwargs) - elif use_role and role_name: - # Assume specific role if requested - logger.info(f"Attempting to assume role: {role_name}") - try: - sts_client = boto3.client('sts') - # Get current identity for logging - identity = sts_client.get_caller_identity() - logger.info(f"Current identity: {identity['Arn']}") - - assumed_role_object = sts_client.assume_role( - RoleArn=f"arn:aws:iam::{boto3.client('sts').get_caller_identity()['Account']}:role/{role_name}", - RoleSessionName="AssumeRoleSession" - ) - credentials = assumed_role_object['Credentials'] - client_kwargs['aws_access_key_id'] = credentials['AccessKeyId'] - client_kwargs['aws_secret_access_key'] = credentials['SecretAccessKey'] - client_kwargs['aws_session_token'] = credentials['SessionToken'] - return boto3.client('s3', **client_kwargs) - except Exception as e: - logger.error(f"Failed to assume role {role_name}: {str(e)}") - raise - elif use_current_role: - # Use the current role (e.g., from Kubernetes service account) - logger.info("Using current role from environment") - try: - # Log environment for debugging - for key, value in sorted(os.environ.items()): - if any(k in key.lower() for k in ['aws', 'role', 'auth', 'token', 'credential']): - logger.info(f"Environment: {key}={value}") - - # Get the AWS region from environment or parameter - aws_region = os.environ.get('AWS_REGION') or os.environ.get('AWS_DEFAULT_REGION') - if not aws_region and not region: - raise ValueError("AWS region must be specified either through region parameter or AWS_REGION environment variable") - - # Use region from parameter only if not set in environment - if not aws_region: - aws_region = region - # Set it in environment for other AWS clients - os.environ['AWS_REGION'] = region - - logger.info(f"Using AWS region: {aws_region}") - - # Create an STS client in the correct region - sts_kwargs = {'endpoint_url': f'https://sts.{aws_region}.amazonaws.com'} - if not os.environ.get('AWS_REGION') and not os.environ.get('AWS_DEFAULT_REGION'): - sts_kwargs['region_name'] = aws_region - sts = boto3.client('sts', **sts_kwargs) - - # Read the web identity token - token_file = os.environ.get('AWS_WEB_IDENTITY_TOKEN_FILE') - role_arn = os.environ.get('AWS_ROLE_ARN') - - if not token_file or not role_arn: - raise ValueError("AWS_WEB_IDENTITY_TOKEN_FILE and AWS_ROLE_ARN must be set") - - with open(token_file, 'r') as f: - token = f.read().strip() - - logger.info("Successfully read web identity token") - logger.info(f"Using role ARN: {role_arn}") - - # Assume role with web identity using regional endpoint - try: - response = sts.assume_role_with_web_identity( - RoleArn=role_arn, - RoleSessionName=os.environ.get('AWS_ROLE_SESSION_NAME', 'WebIdentitySession'), - WebIdentityToken=token - ) - - # Get the temporary credentials - credentials = response['Credentials'] - - # Create the S3 client with the temporary credentials - s3_kwargs = { - 'aws_access_key_id': credentials['AccessKeyId'], - 'aws_secret_access_key': credentials['SecretAccessKey'], - 'aws_session_token': credentials['SessionToken'] - } - # Only set region_name if not already in environment - if not os.environ.get('AWS_REGION') and not os.environ.get('AWS_DEFAULT_REGION'): - s3_kwargs['region_name'] = aws_region - # Add any additional kwargs - s3_kwargs.update(client_kwargs) - client = boto3.client('s3', **s3_kwargs) - - logger.info(f"Successfully assumed role with web identity: {response['AssumedRoleUser']['Arn']}") - - # Test the credentials - try: - # Try to get caller identity first - sts_test = boto3.client( - 'sts', - region_name=aws_region, - aws_access_key_id=credentials['AccessKeyId'], - aws_secret_access_key=credentials['SecretAccessKey'], - aws_session_token=credentials['SessionToken'] - ) - identity = sts_test.get_caller_identity() - logger.info(f"Successfully verified credentials as: {identity['Arn']}") - - # Then try S3 access - bucket_name = os.environ.get('BUCKET_NAME', 'default-bucket') - try: - client.head_bucket(Bucket=bucket_name) - logger.info(f"Successfully verified S3 access to bucket: {bucket_name}") - except ClientError as e: - error_code = e.response['Error']['Code'] - if error_code == '404': - logger.warning(f"Bucket {bucket_name} does not exist, but credentials work") - else: - logger.warning(f"S3 access check failed: {error_code} - {e.response['Error']['Message']}") - except Exception as e: - logger.warning(f"Could not verify credentials: {str(e)}") - - return client - - except ClientError as e: - error_code = e.response['Error']['Code'] - error_message = e.response['Error']['Message'] - logger.error("Failed to assume role with web identity:") - logger.error(f"Error Code: {error_code}") - logger.error(f"Error Message: {error_message}") - logger.error("Trust policy might need to be updated to allow sts:AssumeRoleWithWebIdentity") - logger.error("Current role ARN: " + role_arn) - logger.error("Token file path: " + token_file) - raise - except Exception as e: - logger.error(f"Failed to use current role: {str(e)}") - logger.error("Current environment:") - for key, value in sorted(os.environ.items()): - if any(k in key.lower() for k in ['aws', 'role', 'auth', 'token', 'credential']): - logger.error(f" {key}: {value}") - raise - else: - # Use default credentials (environment, instance profile, or pod service account) - logger.info("Using default credential provider chain") - try: - client = boto3.client('s3', **client_kwargs) - # Try to get caller identity to verify credentials - sts = boto3.client('sts') - identity = sts.get_caller_identity() - logger.info(f"Successfully authenticated as: {identity['Arn']}") - return client - except Exception as e: - logger.error(f"Failed to create S3 client: {str(e)}") - raise - -def parse_s3_path(s3_path): - """ - Parse an S3 path into bucket and key - """ - parts = s3_path.replace('s3://', '').split('/', 1) - bucket = parts[0] - key = parts[1] if len(parts) > 1 else '' - return bucket, key - -def add_common_arguments(parser): - """ - Add common command-line arguments to an ArgumentParser object - """ - auth_group = parser.add_mutually_exclusive_group() - auth_group.add_argument("--use_role", action="store_true", help="Use IAM role for authentication") - auth_group.add_argument("--use_current_role", action="store_true", help="Use current AWS role (e.g. from Kubernetes service account)") - parser.add_argument("--role_name", help="The name of the IAM role to assume (only when --use_role is set)") - parser.add_argument("--aws_access_key_id", help="AWS access key ID") - parser.add_argument("--aws_secret_access_key", help="AWS secret access key") - parser.add_argument("--endpoint_url", help="S3-compatible endpoint URL") - parser.add_argument("--region", help="AWS region (ignored if endpoint_url is specified)") - -def validate_args(args, parser): - """ - Validate command-line arguments - """ - if args.destination.startswith('s3://'): - # Check for conflicting auth methods - if args.use_role and not args.role_name: - parser.error("--role_name is required when using --use_role") - - if args.role_name and not args.use_role: - parser.error("--role_name can only be used with --use_role") - - if args.use_current_role and (args.aws_access_key_id or args.aws_secret_access_key): - parser.error("When using current role (--use_current_role), access key and secret should not be specified") - - # If using explicit credentials, require both key and secret - if (args.aws_access_key_id or args.aws_secret_access_key) and not (args.aws_access_key_id and args.aws_secret_access_key): - parser.error("Both --aws_access_key_id and --aws_secret_access_key must be provided when using access key authentication") diff --git a/go.mod b/go.mod index 7ee688a..c459cd8 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,16 @@ module github.com/lukaszraczylo/kubernetes-images-sync-operator go 1.24.9 require ( + github.com/aws/aws-sdk-go-v2 v1.41.0 + github.com/aws/aws-sdk-go-v2/config v1.32.6 + github.com/aws/aws-sdk-go-v2/credentials v1.19.6 + github.com/aws/aws-sdk-go-v2/service/s3 v1.94.0 + github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 github.com/go-logr/logr v1.4.3 github.com/onsi/ginkgo/v2 v2.27.3 github.com/onsi/gomega v1.38.2 + github.com/spf13/cobra v1.10.2 + github.com/stretchr/testify v1.11.1 k8s.io/api v0.34.3 k8s.io/apimachinery v0.34.3 k8s.io/client-go v0.34.3 @@ -17,6 +24,20 @@ require ( cel.dev/expr v0.25.1 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.1 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.16 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.16 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.16 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 // indirect + github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.8 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12 // indirect + github.com/aws/smithy-go v1.24.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect @@ -62,7 +83,6 @@ require ( github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.67.4 // indirect github.com/prometheus/procfs v0.19.2 // indirect - github.com/spf13/cobra v1.10.2 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/stoewer/go-strcase v1.3.1 // indirect github.com/x448/float16 v0.8.4 // indirect @@ -96,6 +116,7 @@ require ( google.golang.org/protobuf v1.36.11 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.34.3 // indirect k8s.io/apiserver v0.34.3 // indirect k8s.io/component-base v0.34.3 // indirect diff --git a/go.sum b/go.sum index 4e569be..e73fcf1 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,44 @@ github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1 github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= +github.com/aws/aws-sdk-go-v2 v1.41.0 h1:tNvqh1s+v0vFYdA1xq0aOJH+Y5cRyZ5upu6roPgPKd4= +github.com/aws/aws-sdk-go-v2 v1.41.0/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4= +github.com/aws/aws-sdk-go-v2/config v1.32.6 h1:hFLBGUKjmLAekvi1evLi5hVvFQtSo3GYwi+Bx4lpJf8= +github.com/aws/aws-sdk-go-v2/config v1.32.6/go.mod h1:lcUL/gcd8WyjCrMnxez5OXkO3/rwcNmvfno62tnXNcI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.6 h1:F9vWao2TwjV2MyiyVS+duza0NIRtAslgLUM0vTA1ZaE= +github.com/aws/aws-sdk-go-v2/credentials v1.19.6/go.mod h1:SgHzKjEVsdQr6Opor0ihgWtkWdfRAIwxYzSJ8O85VHY= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16 h1:80+uETIWS1BqjnN9uJ0dBUaETh+P1XwFy5vwHwK5r9k= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16/go.mod h1:wOOsYuxYuB/7FlnVtzeBYRcjSRtQpAW0hCP7tIULMwo= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.16 h1:rgGwPzb82iBYSvHMHXc8h9mRoOUBZIGFgKb9qniaZZc= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.16/go.mod h1:L/UxsGeKpGoIj6DxfhOWHWQ/kGKcd4I1VncE4++IyKA= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.16 h1:1jtGzuV7c82xnqOVfx2F0xmJcOw5374L7N6juGW6x6U= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.16/go.mod h1:M2E5OQf+XLe+SZGmmpaI2yy+J326aFf6/+54PoxSANc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 h1:CjMzUs78RDDv4ROu3JnJn/Ig1r6ZD7/T2DXLLRpejic= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16/go.mod h1:uVW4OLBqbJXSHJYA9svT9BluSvvwbzLQ2Crf6UPzR3c= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 h1:DIBqIrJ7hv+e4CmIk2z3pyKT+3B6qVMgRsawHiR3qso= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7/go.mod h1:vLm00xmBke75UmpNvOcZQ/Q30ZFjbczeLFqGx5urmGo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.16 h1:oHjJHeUy0ImIV0bsrX0X91GkV5nJAyv1l1CC9lnO0TI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.16/go.mod h1:iRSNGgOYmiYwSCXxXaKb9HfOEj40+oTKn8pTxMlYkRM= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 h1:NSbvS17MlI2lurYgXnCOLvCFX38sBW4eiVER7+kkgsU= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16/go.mod h1:SwT8Tmqd4sA6G1qaGdzWCJN99bUmPGHfRwwq3G5Qb+A= +github.com/aws/aws-sdk-go-v2/service/s3 v1.94.0 h1:SWTxh/EcUCDVqi/0s26V6pVUq0BBG7kx0tDTmF/hCgA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.94.0/go.mod h1:79S2BdqCJpScXZA2y+cpZuocWsjGjJINyXnOsf5DTz8= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 h1:HpI7aMmJ+mm1wkSHIA2t5EaFFv5EFYXePW30p1EIrbQ= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.4/go.mod h1:C5RdGMYGlfM0gYq/tifqgn4EbyX99V15P2V3R+VHbQU= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.8 h1:aM/Q24rIlS3bRAhTyFurowU8A0SMyGDtEOY/l/s/1Uw= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.8/go.mod h1:+fWt2UHSb4kS7Pu8y+BMBvJF0EWx+4H0hzNwtDNRTrg= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12 h1:AHDr0DaHIAo8c9t1emrzAlVDFp+iMMKnPdYy6XO4MCE= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12/go.mod h1:GQ73XawFFiWxyWXMHWfhiomvP3tXtdNar/fi8z18sx0= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 h1:SciGFVNZ4mHdm7gpD1dgZYnCuVdX1s+lFTg4+4DOy70= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.5/go.mod h1:iW40X4QBmUxdP+fZNOpfmkdMZqsovezbAeO+Ubiv2pk= +github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= +github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= diff --git a/internal/controller/raczylo.com/clusterimage_controller.go b/internal/controller/raczylo.com/clusterimage_controller.go index b3979f4..ae287a4 100644 --- a/internal/controller/raczylo.com/clusterimage_controller.go +++ b/internal/controller/raczylo.com/clusterimage_controller.go @@ -360,12 +360,12 @@ func (r *ClusterImageReconciler) createBackupJob(ctx context.Context, clusterIma if clusterImage.Spec.Storage == shared.STORAGE_S3 { s3Params := shared.SetupS3Params(clusterImageExport.Spec.Storage.S3) additionalCommands := []string{ - "./export.py " + strings.Join(s3Params, " ") + " '/tmp/" + normalisedImageName + ".tar' " + "'s3://" + clusterImageExport.Spec.Storage.S3.Bucket + clusterImage.Spec.ExportPath + "/" + clusterImage.Spec.ExportName + "/" + normalisedImageName + ".tar'", + "./worker export " + strings.Join(s3Params, " ") + " '/tmp/" + normalisedImageName + ".tar' " + "'s3://" + clusterImageExport.Spec.Storage.S3.Bucket + clusterImage.Spec.ExportPath + "/" + clusterImage.Spec.ExportName + "/" + normalisedImageName + ".tar'", } defaultCommands = append(defaultCommands, additionalCommands...) } else if clusterImage.Spec.Storage == shared.STORAGE_FILE { additionalCommands := []string{ - "./export.py /tmp/" + normalisedImageName + ".tar" + " " + clusterImage.Spec.ExportPath + "/" + clusterImage.Spec.ExportName + "/" + normalisedImageName + ".tar", + "./worker export '/tmp/" + normalisedImageName + ".tar' '" + clusterImage.Spec.ExportPath + "/" + clusterImage.Spec.ExportName + "/" + normalisedImageName + ".tar'", } defaultCommands = append(defaultCommands, additionalCommands...) } diff --git a/internal/controller/raczylo.com/clusterimageexport_controller.go b/internal/controller/raczylo.com/clusterimageexport_controller.go index 5bd7af8..c4c3330 100644 --- a/internal/controller/raczylo.com/clusterimageexport_controller.go +++ b/internal/controller/raczylo.com/clusterimageexport_controller.go @@ -5,6 +5,7 @@ import ( "crypto/md5" // #nosec G501 - MD5 used for non-cryptographic unique identifiers only "fmt" "strings" + "time" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" @@ -61,6 +62,19 @@ func (r *ClusterImageExportReconciler) Reconcile(ctx context.Context, req ctrl.R return r.handleDeletion(ctx, clusterImageExport) } + // Check if this export should be deleted by TTL + if r.shouldDeleteByTTL(clusterImageExport) { + l.Info("Deleting export due to TTL expiration", + "export", clusterImageExport.Name, + "ttlDays", *clusterImageExport.Spec.TTLDaysAfterFinished, + "completedAt", clusterImageExport.Status.CompletedAt) + if err := r.Delete(ctx, clusterImageExport); err != nil && !errors.IsNotFound(err) { + l.Error(err, "Failed to delete export by TTL") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + // Add finalizer and creation timestamp annotation if they don't exist needsUpdate := false if !controllerutil.ContainsFinalizer(clusterImageExport, clusterImageExportFinalizer) { @@ -214,6 +228,11 @@ func (r *ClusterImageExportReconciler) Reconcile(ctx context.Context, req ctrl.R } else { export.Status.Progress = shared.STATUS_SUCCESS } + // Set CompletedAt timestamp when export completes + if export.Status.CompletedAt == nil { + now := metav1.Now() + export.Status.CompletedAt = &now + } } return nil }); err != nil { @@ -221,6 +240,15 @@ func (r *ClusterImageExportReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, err } + // If export is complete, run retention cleanup + if clusterImageExport.Status.Progress == shared.STATUS_SUCCESS || + clusterImageExport.Status.Progress == shared.STATUS_FAILED { + if err := r.cleanupByRetention(ctx, clusterImageExport); err != nil { + l.Error(err, "Failed to cleanup by retention policy") + // Don't return error - this is non-critical + } + } + // If there are still pending images, requeue if pendingCount > 0 { return ctrl.Result{Requeue: true}, nil @@ -384,12 +412,12 @@ func (r *ClusterImageExportReconciler) runCleanupJob(ctx context.Context, cluste if clusterImageExport.Spec.Storage.StorageTarget == shared.STORAGE_S3 { s3Params := shared.SetupS3Params(clusterImageExport.Spec.Storage.S3) additionalCommands := []string{ - "./cleanup.py " + strings.Join(s3Params, " ") + " 's3://" + clusterImageExport.Spec.Storage.S3.Bucket + clusterImageExport.Spec.BasePath + "/" + clusterImageExport.ObjectMeta.Name + "/'", + "./worker cleanup " + strings.Join(s3Params, " ") + " 's3://" + clusterImageExport.Spec.Storage.S3.Bucket + clusterImageExport.Spec.BasePath + "/" + clusterImageExport.ObjectMeta.Name + "/'", } defaultCommands = append(defaultCommands, additionalCommands...) } else if clusterImageExport.Spec.Storage.StorageTarget == shared.STORAGE_FILE { additionalCommands := []string{ - "./cleanup.py" + "'" + clusterImageExport.Spec.BasePath + "/" + clusterImageExport.ObjectMeta.Name + "/'", + "./worker cleanup '" + clusterImageExport.Spec.BasePath + "/" + clusterImageExport.ObjectMeta.Name + "/'", } defaultCommands = append(defaultCommands, additionalCommands...) } @@ -452,3 +480,110 @@ func (r *ClusterImageExportReconciler) runCleanupJob(ctx context.Context, cluste l.Info("Created cleanup job with retry limit and TTL") return nil } + +// shouldDeleteByTTL checks if the export should be deleted based on TTL (in days) +func (r *ClusterImageExportReconciler) shouldDeleteByTTL(clusterImageExport *raczylocomv1.ClusterImageExport) bool { + // Only apply TTL to completed exports + if clusterImageExport.Status.Progress != shared.STATUS_SUCCESS && + clusterImageExport.Status.Progress != shared.STATUS_FAILED { + return false + } + + // Check if TTL is configured + if clusterImageExport.Spec.TTLDaysAfterFinished == nil { + return false + } + + // Check if CompletedAt is set + if clusterImageExport.Status.CompletedAt == nil { + return false + } + + // Convert days to duration (24 hours per day) + ttlDuration := time.Duration(*clusterImageExport.Spec.TTLDaysAfterFinished) * 24 * time.Hour + expirationTime := clusterImageExport.Status.CompletedAt.Add(ttlDuration) + + return time.Now().After(expirationTime) +} + +// cleanupByRetention enforces the retention policy for completed exports +func (r *ClusterImageExportReconciler) cleanupByRetention(ctx context.Context, clusterImageExport *raczylocomv1.ClusterImageExport) error { + l := log.FromContext(ctx) + + // Check if retention policy is configured + if clusterImageExport.Spec.Retention == nil { + return nil + } + + // List all ClusterImageExports in the same namespace + exportList := &raczylocomv1.ClusterImageExportList{} + if err := r.List(ctx, exportList, client.InNamespace(clusterImageExport.Namespace)); err != nil { + return fmt.Errorf("failed to list ClusterImageExports: %w", err) + } + + // Separate successful and failed exports, sorted by completion time + var successfulExports, failedExports []*raczylocomv1.ClusterImageExport + for i := range exportList.Items { + export := &exportList.Items[i] + // Skip exports that don't have the same base path (different backup sets) + if export.Spec.BasePath != clusterImageExport.Spec.BasePath { + continue + } + // Skip exports that are still running + if export.Status.Progress != shared.STATUS_SUCCESS && + export.Status.Progress != shared.STATUS_FAILED { + continue + } + if export.Status.Progress == shared.STATUS_SUCCESS { + successfulExports = append(successfulExports, export) + } else if export.Status.Progress == shared.STATUS_FAILED { + failedExports = append(failedExports, export) + } + } + + // Sort by CompletedAt (newest first) + sortByCompletionTime := func(exports []*raczylocomv1.ClusterImageExport) { + for i := 0; i < len(exports); i++ { + for j := i + 1; j < len(exports); j++ { + iTime := exports[i].Status.CompletedAt + jTime := exports[j].Status.CompletedAt + if iTime == nil || (jTime != nil && jTime.After(iTime.Time)) { + exports[i], exports[j] = exports[j], exports[i] + } + } + } + } + + sortByCompletionTime(successfulExports) + sortByCompletionTime(failedExports) + + // Delete excess successful exports + if clusterImageExport.Spec.Retention.MaxSuccessful != nil { + maxSuccessful := int(*clusterImageExport.Spec.Retention.MaxSuccessful) + if len(successfulExports) > maxSuccessful { + for _, export := range successfulExports[maxSuccessful:] { + l.Info("Deleting export due to retention policy (maxSuccessful exceeded)", + "export", export.Name, "maxSuccessful", maxSuccessful) + if err := r.Delete(ctx, export); err != nil && !errors.IsNotFound(err) { + l.Error(err, "Failed to delete export for retention", "export", export.Name) + } + } + } + } + + // Delete excess failed exports + if clusterImageExport.Spec.Retention.MaxFailed != nil { + maxFailed := int(*clusterImageExport.Spec.Retention.MaxFailed) + if len(failedExports) > maxFailed { + for _, export := range failedExports[maxFailed:] { + l.Info("Deleting export due to retention policy (maxFailed exceeded)", + "export", export.Name, "maxFailed", maxFailed) + if err := r.Delete(ctx, export); err != nil && !errors.IsNotFound(err) { + l.Error(err, "Failed to delete export for retention", "export", export.Name) + } + } + } + } + + return nil +} diff --git a/internal/controller/raczylo.com/controller_unit_test.go b/internal/controller/raczylo.com/controller_unit_test.go new file mode 100644 index 0000000..2897b35 --- /dev/null +++ b/internal/controller/raczylo.com/controller_unit_test.go @@ -0,0 +1,1031 @@ +package raczylocom + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + raczylocomv1 "github.com/lukaszraczylo/kubernetes-images-sync-operator/api/raczylo.com/v1" + "github.com/lukaszraczylo/kubernetes-images-sync-operator/internal/shared" +) + +type TestScenario string + +const ( + ScenarioGood TestScenario = "good" + ScenarioNotGood TestScenario = "not_good" + ScenarioReallyBad TestScenario = "really_bad" +) + +type ControllerTestSuite struct { + suite.Suite + scheme *runtime.Scheme + ctx context.Context +} + +func TestControllerTestSuite(t *testing.T) { + suite.Run(t, new(ControllerTestSuite)) +} + +func (s *ControllerTestSuite) SetupSuite() { + s.scheme = runtime.NewScheme() + require.NoError(s.T(), clientgoscheme.AddToScheme(s.scheme)) + require.NoError(s.T(), raczylocomv1.AddToScheme(s.scheme)) + s.ctx = context.Background() +} + +func (s *ControllerTestSuite) newFakeClient(objs ...client.Object) client.Client { + return fake.NewClientBuilder(). + WithScheme(s.scheme). + WithObjects(objs...). + WithStatusSubresource(&raczylocomv1.ClusterImage{}, &raczylocomv1.ClusterImageExport{}). + WithIndex(&raczylocomv1.ClusterImage{}, "spec.exportName", func(obj client.Object) []string { + ci := obj.(*raczylocomv1.ClusterImage) + return []string{ci.Spec.ExportName} + }). + Build() +} + +// Helper to create a test ClusterImageExport +func (s *ControllerTestSuite) createClusterImageExport(name, namespace string, opts ...func(*raczylocomv1.ClusterImageExport)) *raczylocomv1.ClusterImageExport { + export := &raczylocomv1.ClusterImageExport{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "raczylo.com/v1", + Kind: "ClusterImageExport", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + UID: "test-uid-export", + }, + Spec: raczylocomv1.ClusterImageExportSpec{ + Name: name, + BasePath: "/backup", + Storage: raczylocomv1.ClusterImageStorageSpec{ + StorageTarget: shared.STORAGE_S3, + S3: raczylocomv1.ClusterImageStorageS3{ + Bucket: "test-bucket", + Region: "us-east-1", + }, + }, + MaxConcurrentJobs: 5, + }, + } + for _, opt := range opts { + opt(export) + } + return export +} + +// Helper to create a test ClusterImage +func (s *ControllerTestSuite) createClusterImage(name, namespace, exportName string, opts ...func(*raczylocomv1.ClusterImage)) *raczylocomv1.ClusterImage { + image := &raczylocomv1.ClusterImage{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "raczylo.com/v1", + Kind: "ClusterImage", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + UID: "test-uid-image", + }, + Spec: raczylocomv1.ClusterImageSpec{ + Image: "nginx", + Tag: "latest", + FullName: "nginx:latest", + Storage: shared.STORAGE_S3, + ExportName: exportName, + ExportPath: "/backup", + }, + } + for _, opt := range opts { + opt(image) + } + return image +} + +// ==================== ClusterImage Controller Tests ==================== + +func (s *ControllerTestSuite) TestClusterImageReconcile_NotFound() { + // Scenario: Good - resource doesn't exist, nothing to do + client := s.newFakeClient() + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "non-existent", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_InitialStatus_Pending() { + // Scenario: Good - new ClusterImage should be set to PENDING + export := s.createClusterImageExport("test-export", "default") + image := s.createClusterImage("test-image", "default", "test-export") + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + require.NoError(s.T(), err) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior + + // Verify status was updated to PENDING + updatedImage := &raczylocomv1.ClusterImage{} + err = client.Get(s.ctx, types.NamespacedName{Name: "test-image", Namespace: "default"}, updatedImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), shared.STATUS_PENDING, updatedImage.Status.Progress) +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_MissingExport() { + // Scenario: Not Good - ClusterImageExport doesn't exist + image := s.createClusterImage("test-image", "default", "non-existent-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_PENDING + }) + + client := s.newFakeClient(image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.Error(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_MaxParallelJobsReached() { + // Scenario: Good - should requeue when max jobs reached + export := s.createClusterImageExport("test-export", "default") + image := s.createClusterImage("test-image", "default", "test-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_PENDING + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + ActiveJobs: 5, // Already at max + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + require.NoError(s.T(), err) + assert.Equal(s.T(), time.Second*30, result.RequeueAfter) +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_SuccessStatus() { + // Scenario: Good - success status should not trigger further action + export := s.createClusterImageExport("test-export", "default") + image := s.createClusterImage("test-image", "default", "test-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_SUCCESS + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_FailedStatus() { + // Scenario: Good - failed status should not trigger further action + export := s.createClusterImageExport("test-export", "default") + image := s.createClusterImage("test-image", "default", "test-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_FAILED + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_PresentStatus() { + // Scenario: Good - present status should not trigger further action + export := s.createClusterImageExport("test-export", "default") + image := s.createClusterImage("test-image", "default", "test-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_PRESENT + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) +} + +// ==================== ClusterImageExport Controller Tests ==================== + +func (s *ControllerTestSuite) TestClusterImageExportReconcile_NotFound() { + // Scenario: Good - resource doesn't exist, nothing to do + client := s.newFakeClient() + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "non-existent", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) +} + +func (s *ControllerTestSuite) TestClusterImageExportReconcile_AddFinalizer() { + // Scenario: Good - should add finalizer to new export + export := s.createClusterImageExport("test-export", "default") + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + require.NoError(s.T(), err) + + // Verify finalizer was added + updatedExport := &raczylocomv1.ClusterImageExport{} + err = client.Get(s.ctx, types.NamespacedName{Name: "test-export", Namespace: "default"}, updatedExport) + require.NoError(s.T(), err) + assert.Contains(s.T(), updatedExport.Finalizers, clusterImageExportFinalizer) +} + +func (s *ControllerTestSuite) TestClusterImageExportReconcile_AddCreationTimestamp() { + // Scenario: Good - should add creation timestamp annotation + export := s.createClusterImageExport("test-export", "default") + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + require.NoError(s.T(), err) + + // Verify annotation was added + updatedExport := &raczylocomv1.ClusterImageExport{} + err = client.Get(s.ctx, types.NamespacedName{Name: "test-export", Namespace: "default"}, updatedExport) + require.NoError(s.T(), err) + assert.NotNil(s.T(), updatedExport.Annotations) + _, exists := updatedExport.Annotations["export.raczylo.com/creation-timestamp"] + assert.True(s.T(), exists) +} + +func (s *ControllerTestSuite) TestClusterImageExportReconcile_InjectPodAnnotations() { + // Scenario: Good - pod annotations should be injectable + reconciler := &ClusterImageExportReconciler{} + + annotations := map[string]string{ + "prometheus.io/scrape": "true", + "prometheus.io/port": "8080", + } + reconciler.InjectPodAnnotations(annotations) + + assert.Equal(s.T(), annotations, reconciler.podAnnotations) +} + +// ==================== Matrix Test Scenarios ==================== + +type ClusterImageScenario struct { + Name string + Scenario TestScenario + SetupFunc func(*ControllerTestSuite) (client.Client, *ClusterImageReconciler, reconcile.Request) + ExpectedError bool + ExpectedStatus string + Description string +} + +func (s *ControllerTestSuite) TestClusterImageReconcile_MatrixScenarios() { + scenarios := []ClusterImageScenario{ + { + Name: "new_image_initialization", + Scenario: ScenarioGood, + Description: "New ClusterImage should be initialized to PENDING", + SetupFunc: func(s *ControllerTestSuite) (client.Client, *ClusterImageReconciler, reconcile.Request) { + export := s.createClusterImageExport("export-1", "default") + image := s.createClusterImage("image-1", "default", "export-1") + c := s.newFakeClient(export, image) + r := &ClusterImageReconciler{Client: c, Scheme: s.scheme, MaxParallelJobs: 5} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "image-1", Namespace: "default"}} + return c, r, req + }, + ExpectedError: false, + ExpectedStatus: shared.STATUS_PENDING, + }, + { + Name: "missing_export_reference", + Scenario: ScenarioNotGood, + Description: "ClusterImage with missing export should error", + SetupFunc: func(s *ControllerTestSuite) (client.Client, *ClusterImageReconciler, reconcile.Request) { + image := s.createClusterImage("orphan-image", "default", "missing-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_PENDING + }) + c := s.newFakeClient(image) + r := &ClusterImageReconciler{Client: c, Scheme: s.scheme, MaxParallelJobs: 5} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "orphan-image", Namespace: "default"}} + return c, r, req + }, + ExpectedError: true, + }, + { + Name: "success_status_no_action", + Scenario: ScenarioGood, + Description: "ClusterImage with SUCCESS status should not trigger action", + SetupFunc: func(s *ControllerTestSuite) (client.Client, *ClusterImageReconciler, reconcile.Request) { + export := s.createClusterImageExport("export-2", "default") + image := s.createClusterImage("success-image", "default", "export-2", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_SUCCESS + }) + c := s.newFakeClient(export, image) + r := &ClusterImageReconciler{Client: c, Scheme: s.scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "success-image", Namespace: "default"}} + return c, r, req + }, + ExpectedError: false, + ExpectedStatus: shared.STATUS_SUCCESS, + }, + { + Name: "failed_status_no_action", + Scenario: ScenarioNotGood, + Description: "ClusterImage with FAILED status should not trigger action", + SetupFunc: func(s *ControllerTestSuite) (client.Client, *ClusterImageReconciler, reconcile.Request) { + export := s.createClusterImageExport("export-3", "default") + image := s.createClusterImage("failed-image", "default", "export-3", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_FAILED + }) + c := s.newFakeClient(export, image) + r := &ClusterImageReconciler{Client: c, Scheme: s.scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "failed-image", Namespace: "default"}} + return c, r, req + }, + ExpectedError: false, + ExpectedStatus: shared.STATUS_FAILED, + }, + { + Name: "empty_namespace", + Scenario: ScenarioReallyBad, + Description: "ClusterImage in empty namespace should be handled gracefully", + SetupFunc: func(s *ControllerTestSuite) (client.Client, *ClusterImageReconciler, reconcile.Request) { + c := s.newFakeClient() + r := &ClusterImageReconciler{Client: c, Scheme: s.scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "bad-image", Namespace: ""}} + return c, r, req + }, + ExpectedError: false, // Not found, gracefully handled + }, + } + + for _, tc := range scenarios { + s.Run(tc.Name, func() { + client, reconciler, req := tc.SetupFunc(s) + result, err := reconciler.Reconcile(s.ctx, req) + + if tc.ExpectedError { + assert.Error(s.T(), err, "Expected error for scenario: %s", tc.Name) + } else { + assert.NoError(s.T(), err, "Unexpected error for scenario: %s", tc.Name) + } + + if tc.ExpectedStatus != "" && !tc.ExpectedError { + image := &raczylocomv1.ClusterImage{} + getErr := client.Get(s.ctx, req.NamespacedName, image) + if getErr == nil { + assert.Equal(s.T(), tc.ExpectedStatus, image.Status.Progress) + } + } + + _ = result // Result validation varies by scenario + }) + } +} + +// ==================== Kubernetes Volatility Tests ==================== + +func (s *ControllerTestSuite) TestClusterImage_ConcurrentUpdates() { + // Scenario: Good - simulate concurrent reconciliation on PENDING status + // This test verifies that multiple reconciliations don't corrupt state + export := s.createClusterImageExport("concurrent-export", "default") + image := s.createClusterImage("concurrent-image", "default", "concurrent-export") + + fakeClient := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: fakeClient, + Scheme: s.scheme, + MaxParallelJobs: 10, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "concurrent-image", + Namespace: "default", + }, + } + + // First reconciliation should set status to PENDING + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior + + // Verify status was set + finalImage := &raczylocomv1.ClusterImage{} + err = fakeClient.Get(s.ctx, req.NamespacedName, finalImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), shared.STATUS_PENDING, finalImage.Status.Progress) +} + +func (s *ControllerTestSuite) TestClusterImage_ActiveJobsMutex() { + // Scenario: Good - verify mutex protects ActiveJobs counter + reconciler := &ClusterImageReconciler{ + MaxParallelJobs: 10, + ActiveJobs: 0, + } + + done := make(chan bool) + iterations := 100 + + // Concurrent increments + go func() { + for i := 0; i < iterations; i++ { + reconciler.activeJobsMu.Lock() + reconciler.ActiveJobs++ + reconciler.activeJobsMu.Unlock() + } + done <- true + }() + + // Concurrent decrements + go func() { + for i := 0; i < iterations; i++ { + reconciler.activeJobsMu.Lock() + reconciler.ActiveJobs-- + reconciler.activeJobsMu.Unlock() + } + done <- true + }() + + <-done + <-done + + // Final count should be 0 + reconciler.activeJobsMu.Lock() + finalCount := reconciler.ActiveJobs + reconciler.activeJobsMu.Unlock() + assert.Equal(s.T(), 0, finalCount) +} + +func (s *ControllerTestSuite) TestClusterImageExport_WithDeletionTimestamp() { + // Scenario: Good - export being deleted should trigger cleanup + export := s.createClusterImageExport("deleting-export", "default") + now := metav1.Now() + export.DeletionTimestamp = &now + export.Finalizers = []string{clusterImageExportFinalizer} + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "deleting-export", + Namespace: "default", + }, + } + + // This should trigger deletion handling + result, err := reconciler.Reconcile(s.ctx, req) + // May error due to cleanup job creation, but should not panic + _ = err + _ = result +} + +// ==================== Image Parsing Scenarios (Controller Integration) ==================== + +func (s *ControllerTestSuite) TestClusterImage_SHAPinnedImages() { + // Scenario: Good - SHA-pinned images should be handled correctly + export := s.createClusterImageExport("sha-export", "default") + image := s.createClusterImage("sha-image", "default", "sha-export", func(i *raczylocomv1.ClusterImage) { + i.Spec.Image = "quay.io/cilium/cilium" + i.Spec.Tag = "v1.18.4" + i.Spec.Sha = "sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f" + i.Spec.FullName = "quay.io/cilium/cilium:v1.18.4@sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f" + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "sha-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior +} + +func (s *ControllerTestSuite) TestClusterImage_MultipleRegistries() { + // Scenario: Good - images from different registries + registries := []struct { + name string + image string + fullName string + }{ + {"gcr-image", "gcr.io/distroless/static", "gcr.io/distroless/static:nonroot"}, + {"ecr-image", "123456789.dkr.ecr.us-east-1.amazonaws.com/myapp", "123456789.dkr.ecr.us-east-1.amazonaws.com/myapp:v1.0"}, + {"dockerhub-image", "library/nginx", "nginx:latest"}, + {"quay-image", "quay.io/coreos/etcd", "quay.io/coreos/etcd:v3.5.0"}, + } + + export := s.createClusterImageExport("multi-registry-export", "default") + objs := []client.Object{export} + + for _, reg := range registries { + img := s.createClusterImage(reg.name, "default", "multi-registry-export", func(i *raczylocomv1.ClusterImage) { + i.Spec.Image = reg.image + i.Spec.FullName = reg.fullName + }) + objs = append(objs, img) + } + + client := s.newFakeClient(objs...) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 10, + } + + for _, reg := range registries { + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: reg.name, + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err, "Failed for registry: %s", reg.name) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior + } +} + +// ==================== Storage Configuration Tests ==================== + +func (s *ControllerTestSuite) TestClusterImageExport_S3Storage() { + // Scenario: Good - S3 storage configuration + export := s.createClusterImageExport("s3-export", "default", func(e *raczylocomv1.ClusterImageExport) { + e.Spec.Storage = raczylocomv1.ClusterImageStorageSpec{ + StorageTarget: shared.STORAGE_S3, + S3: raczylocomv1.ClusterImageStorageS3{ + Bucket: "my-backup-bucket", + Region: "eu-west-1", + UseRole: true, + RoleARN: "arn:aws:iam::123456789:role/BackupRole", + Endpoint: "https://s3.eu-west-1.amazonaws.com", + }, + } + }) + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "s3-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + + // Verify export was updated + updatedExport := &raczylocomv1.ClusterImageExport{} + err = client.Get(s.ctx, req.NamespacedName, updatedExport) + require.NoError(s.T(), err) + assert.Equal(s.T(), shared.STORAGE_S3, updatedExport.Spec.Storage.StorageTarget) +} + +func (s *ControllerTestSuite) TestClusterImageExport_FileStorage() { + // Scenario: Good - File storage configuration + export := s.createClusterImageExport("file-export", "default", func(e *raczylocomv1.ClusterImageExport) { + e.Spec.Storage = raczylocomv1.ClusterImageStorageSpec{ + StorageTarget: shared.STORAGE_FILE, + } + e.Spec.BasePath = "/mnt/backup" + }) + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "file-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) +} + +// ==================== Edge Cases ==================== + +func (s *ControllerTestSuite) TestClusterImage_EmptySpec() { + // Scenario: Really Bad - empty spec should be handled + export := s.createClusterImageExport("empty-spec-export", "default") + image := &raczylocomv1.ClusterImage{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "raczylo.com/v1", + Kind: "ClusterImage", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "empty-spec-image", + Namespace: "default", + UID: "test-uid", + }, + Spec: raczylocomv1.ClusterImageSpec{ + ExportName: "empty-spec-export", + }, + } + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "empty-spec-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + // Should not error, just set to pending + assert.NoError(s.T(), err) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior +} + +func (s *ControllerTestSuite) TestClusterImageExport_EmptyNamespaces() { + // Scenario: Good - export with no namespace filters should process all + export := s.createClusterImageExport("all-ns-export", "default", func(e *raczylocomv1.ClusterImageExport) { + e.Spec.Namespaces = []string{} + e.Spec.ExcludedNamespaces = []string{} + }) + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "all-ns-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) +} + +func (s *ControllerTestSuite) TestClusterImageExport_WithAdditionalImages() { + // Scenario: Good - export with additional images specified + export := s.createClusterImageExport("additional-images-export", "default", func(e *raczylocomv1.ClusterImageExport) { + e.Spec.AdditionalImages = []string{ + "nginx:1.21", + "redis:7.0", + "postgres:15@sha256:abc123", + } + }) + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "additional-images-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) +} + +func (s *ControllerTestSuite) TestClusterImageExport_WithIncludesExcludes() { + // Scenario: Good - export with include/exclude filters + export := s.createClusterImageExport("filtered-export", "default", func(e *raczylocomv1.ClusterImageExport) { + e.Spec.Includes = []string{"nginx", "redis"} + e.Spec.Excludes = []string{"test", "dev"} + e.Spec.Namespaces = []string{"production", "staging"} + e.Spec.ExcludedNamespaces = []string{"kube-system"} + }) + + client := s.newFakeClient(export) + reconciler := &ClusterImageExportReconciler{ + Client: client, + Scheme: s.scheme, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "filtered-export", + Namespace: "default", + }, + } + + _, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) +} + +func (s *ControllerTestSuite) TestClusterImage_WithImagePullSecrets() { + // Scenario: Good - image with pull secrets should work + export := s.createClusterImageExport("secret-export", "default") + image := s.createClusterImage("secret-image", "default", "secret-export", func(i *raczylocomv1.ClusterImage) { + i.Spec.ImagePullSecrets = []corev1.LocalObjectReference{ + {Name: "docker-registry-secret"}, + {Name: "gcr-json-key"}, + } + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "secret-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior +} + +func (s *ControllerTestSuite) TestClusterImage_WithJobAnnotations() { + // Scenario: Good - image with job annotations + export := s.createClusterImageExport("annotated-export", "default", func(e *raczylocomv1.ClusterImageExport) { + e.Spec.JobAnnotations = map[string]string{ + "iam.amazonaws.com/role": "arn:aws:iam::123456789:role/BackupRole", + } + }) + image := s.createClusterImage("annotated-image", "default", "annotated-export", func(i *raczylocomv1.ClusterImage) { + i.Spec.JobAnnotations = map[string]string{ + "custom/annotation": "value", + } + }) + + client := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: client, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "annotated-image", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.True(s.T(), result.Requeue) //lint:ignore SA1019 testing controller's actual behavior +} + +// ==================== Job Status Tests ==================== + +func (s *ControllerTestSuite) TestClusterImage_SuccessfulJobCompletion() { + // Scenario: Good - verify job success status detection logic + // Note: Full integration testing requires envtest for KubeClient + // This test validates the basic state machine transitions + + export := s.createClusterImageExport("job-success-export", "default") + image := s.createClusterImage("job-success-image", "default", "job-success-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_SUCCESS // Already completed + }) + + fakeClient := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: fakeClient, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "job-success-image", + Namespace: "default", + }, + } + + // SUCCESS status should result in no-op + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) + + // Status should remain unchanged + finalImage := &raczylocomv1.ClusterImage{} + err = fakeClient.Get(s.ctx, req.NamespacedName, finalImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), shared.STATUS_SUCCESS, finalImage.Status.Progress) +} + +func (s *ControllerTestSuite) TestClusterImage_RetryCount() { + // Scenario: Good - verify retry count is tracked properly + export := s.createClusterImageExport("retry-export", "default") + image := s.createClusterImage("retry-image", "default", "retry-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_FAILED // Terminal state + i.Status.RetryCount = 2 + }) + + fakeClient := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: fakeClient, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "retry-image", + Namespace: "default", + }, + } + + // FAILED status should result in no-op + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) + + // Verify retry count is preserved + finalImage := &raczylocomv1.ClusterImage{} + err = fakeClient.Get(s.ctx, req.NamespacedName, finalImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), 2, finalImage.Status.RetryCount) +} + +func (s *ControllerTestSuite) TestClusterImage_MaxRetriesReached() { + // Scenario: Not Good - max retries reached and FAILED status + export := s.createClusterImageExport("max-retry-export", "default") + image := s.createClusterImage("max-retry-image", "default", "max-retry-export", func(i *raczylocomv1.ClusterImage) { + i.Status.Progress = shared.STATUS_FAILED + i.Status.RetryCount = 3 // Max retries reached + }) + + fakeClient := s.newFakeClient(export, image) + reconciler := &ClusterImageReconciler{ + Client: fakeClient, + Scheme: s.scheme, + MaxParallelJobs: 5, + } + + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "max-retry-image", + Namespace: "default", + }, + } + + // FAILED status with max retries should be terminal + result, err := reconciler.Reconcile(s.ctx, req) + assert.NoError(s.T(), err) + assert.Equal(s.T(), ctrl.Result{}, result) + + // Verify final state + finalImage := &raczylocomv1.ClusterImage{} + err = fakeClient.Get(s.ctx, req.NamespacedName, finalImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), shared.STATUS_FAILED, finalImage.Status.Progress) + assert.Equal(s.T(), 3, finalImage.Status.RetryCount) +} diff --git a/internal/shared/definitions_test.go b/internal/shared/definitions_test.go new file mode 100644 index 0000000..b9f4ce9 --- /dev/null +++ b/internal/shared/definitions_test.go @@ -0,0 +1,643 @@ +package shared + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" +) + +// TestScenario represents a test scenario classification +type TestScenario string + +const ( + ScenarioGood TestScenario = "good" + ScenarioNotGood TestScenario = "not_good" + ScenarioReallyBad TestScenario = "really_bad" +) + +// DefinitionsTestSuite tests the definitions and utility functions +type DefinitionsTestSuite struct { + suite.Suite +} + +func TestDefinitionsTestSuite(t *testing.T) { + suite.Run(t, new(DefinitionsTestSuite)) +} + +// TestNormalizeImageName tests the NormalizeImageName function with matrix strategy +func (s *DefinitionsTestSuite) TestNormalizeImageName() { + testCases := []struct { + name string + scenario TestScenario + input string + expected string + }{ + // Good scenarios - standard image names + { + name: "simple image name", + scenario: ScenarioGood, + input: "nginx", + expected: "nginx", + }, + { + name: "image with tag", + scenario: ScenarioGood, + input: "nginx:latest", + expected: "nginx-latest", + }, + { + name: "image with version tag", + scenario: ScenarioGood, + input: "nginx:1.21.0", + expected: "nginx-1.21.0", + }, + { + name: "full registry path", + scenario: ScenarioGood, + input: "quay.io/cilium/cilium:v1.18.4", + expected: "quay.io-cilium-cilium-v1.18.4", + }, + { + name: "ghcr registry", + scenario: ScenarioGood, + input: "ghcr.io/owner/repo:v1.0.0", + expected: "ghcr.io-owner-repo-v1.0.0", + }, + + // Not good scenarios - unusual but valid formats + { + name: "image with SHA digest", + scenario: ScenarioNotGood, + input: "nginx@sha256:abc123def456", + expected: "nginx-sha256-abc123def456", + }, + { + name: "image with tag and SHA", + scenario: ScenarioNotGood, + input: "quay.io/cilium/cilium:v1.18.4@sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + expected: "quay.io-cilium-cilium-v1.18.4-sha256-49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + }, + { + name: "multiple colons in path", + scenario: ScenarioNotGood, + input: "registry:5000/image:tag", + expected: "registry-5000-image-tag", + }, + + // Really bad scenarios - edge cases and potential problems + { + name: "empty string", + scenario: ScenarioReallyBad, + input: "", + expected: "", + }, + { + name: "only special characters", + scenario: ScenarioReallyBad, + input: ":///@", + expected: "", + }, + { + name: "multiple consecutive special chars", + scenario: ScenarioReallyBad, + input: "image:::tag", + expected: "image-tag", + }, + { + name: "leading special characters", + scenario: ScenarioReallyBad, + input: "//image:tag", + expected: "image-tag", + }, + { + name: "trailing special characters", + scenario: ScenarioReallyBad, + input: "image:tag//", + expected: "image-tag", + }, + { + name: "spaces in name", + scenario: ScenarioReallyBad, + input: "image name:tag", + expected: "image-name-tag", + }, + { + name: "unicode characters", + scenario: ScenarioReallyBad, + input: "image:tag-日本語", + expected: "image-tag-日本語", + }, + { + name: "very long image name", + scenario: ScenarioReallyBad, + input: "registry.example.com/very/long/path/to/image:tag@sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890", + expected: "registry.example.com-very-long-path-to-image-tag-sha256-abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890", + }, + { + name: "query string characters", + scenario: ScenarioReallyBad, + input: "image?foo=bar&baz=qux", + expected: "image-foo-bar-baz-qux", + }, + { + name: "brackets and special chars", + scenario: ScenarioReallyBad, + input: "image[tag]{version}", + expected: "image-tag-version", + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result := NormalizeImageName(tc.input) + assert.Equal(s.T(), tc.expected, result, "Scenario: %s", tc.scenario) + }) + } +} + +// TestRemoveDuplicates tests duplicate removal functionality +func (s *DefinitionsTestSuite) TestRemoveDuplicates() { + testCases := []struct { + name string + scenario TestScenario + input ContainersList + expected int + }{ + // Good scenarios + { + name: "no duplicates", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "redis", Tag: "6", FullName: "redis:6"}, + }, + }, + expected: 2, + }, + { + name: "with duplicates", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "redis", Tag: "6", FullName: "redis:6"}, + }, + }, + expected: 2, + }, + + // Not good scenarios + { + name: "same image different tags", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "nginx", Tag: "1.21", FullName: "nginx:1.21"}, + }, + }, + expected: 2, // Should keep both + }, + { + name: "same image with and without SHA", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "nginx", Tag: "latest", Sha: "sha256:abc", FullName: "nginx:latest@sha256:abc"}, + }, + }, + expected: 2, // Different because of SHA + }, + + // Really bad scenarios + { + name: "empty list", + scenario: ScenarioReallyBad, + input: ContainersList{Containers: []Container{}}, + expected: 0, + }, + { + name: "all duplicates", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + {Image: "nginx", Tag: "latest", FullName: "nginx:latest"}, + }, + }, + expected: 1, + }, + { + name: "nil containers", + scenario: ScenarioReallyBad, + input: ContainersList{Containers: nil}, + expected: 0, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result := RemoveDuplicates(tc.input) + assert.Len(s.T(), result.Containers, tc.expected, "Scenario: %s", tc.scenario) + }) + } +} + +// TestRemoveExcludedImages tests image exclusion functionality +func (s *DefinitionsTestSuite) TestRemoveExcludedImages() { + testCases := []struct { + name string + scenario TestScenario + input ContainersList + excludes []string + expected int + }{ + // Good scenarios + { + name: "no exclusions", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + {Image: "redis", Tag: "6"}, + }, + }, + excludes: []string{}, + expected: 2, + }, + { + name: "exclude one image", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + {Image: "redis", Tag: "6"}, + }, + }, + excludes: []string{"nginx"}, + expected: 1, + }, + { + name: "exclude by registry", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "gcr.io/google/nginx", Tag: "latest"}, + {Image: "docker.io/library/redis", Tag: "6"}, + }, + }, + excludes: []string{"gcr.io"}, + expected: 1, + }, + + // Not good scenarios + { + name: "case insensitive exclusion", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "NGINX", Tag: "latest"}, + {Image: "Redis", Tag: "6"}, + }, + }, + excludes: []string{"nginx"}, + expected: 1, // Should exclude NGINX + }, + { + name: "partial match exclusion", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "my-nginx-custom", Tag: "latest"}, + {Image: "redis", Tag: "6"}, + }, + }, + excludes: []string{"nginx"}, + expected: 1, // Should exclude my-nginx-custom + }, + + // Really bad scenarios + { + name: "exclude all images", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + {Image: "redis", Tag: "6"}, + }, + }, + excludes: []string{"nginx", "redis"}, + expected: 0, + }, + { + name: "empty exclude list on empty containers", + scenario: ScenarioReallyBad, + input: ContainersList{Containers: []Container{}}, + excludes: []string{}, + expected: 0, + }, + { + name: "exclude with empty string", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + }, + }, + excludes: []string{""}, + expected: 0, // Empty string matches all + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result := RemoveExcludedImages(tc.input, tc.excludes) + assert.Len(s.T(), result.Containers, tc.expected, "Scenario: %s", tc.scenario) + }) + } +} + +// TestIncludeOnlyImages tests image inclusion filtering +func (s *DefinitionsTestSuite) TestIncludeOnlyImages() { + testCases := []struct { + name string + scenario TestScenario + input ContainersList + includes []string + expected int + }{ + // Good scenarios + { + name: "include specific image", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + {Image: "redis", Tag: "6"}, + {Image: "postgres", Tag: "14"}, + }, + }, + includes: []string{"nginx"}, + expected: 1, + }, + { + name: "include multiple images", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + {Image: "redis", Tag: "6"}, + {Image: "postgres", Tag: "14"}, + }, + }, + includes: []string{"nginx", "redis"}, + expected: 2, + }, + + // Not good scenarios + { + name: "include by partial match", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "my-nginx-app", Tag: "latest"}, + {Image: "nginx-proxy", Tag: "v1"}, + {Image: "redis", Tag: "6"}, + }, + }, + includes: []string{"nginx"}, + expected: 2, + }, + + // Really bad scenarios + { + name: "include non-existent image", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + }, + }, + includes: []string{"nonexistent"}, + expected: 0, + }, + { + name: "empty includes list", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", Tag: "latest"}, + }, + }, + includes: []string{}, + expected: 0, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result := IncludeOnlyImages(tc.input, tc.includes) + assert.Len(s.T(), result.Containers, tc.expected, "Scenario: %s", tc.scenario) + }) + } +} + +// TestFilterOnlyFromNamespaces tests namespace filtering +func (s *DefinitionsTestSuite) TestFilterOnlyFromNamespaces() { + testCases := []struct { + name string + scenario TestScenario + input ContainersList + namespaces []string + expected int + }{ + // Good scenarios + { + name: "filter single namespace", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + {Image: "redis", ImageNamespace: "kube-system"}, + }, + }, + namespaces: []string{"default"}, + expected: 1, + }, + + // Not good scenarios + { + name: "filter multiple namespaces", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + {Image: "redis", ImageNamespace: "kube-system"}, + {Image: "postgres", ImageNamespace: "database"}, + }, + }, + namespaces: []string{"default", "database"}, + expected: 2, + }, + + // Really bad scenarios + { + name: "filter non-existent namespace", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + }, + }, + namespaces: []string{"nonexistent"}, + expected: 0, + }, + { + name: "empty namespace filter", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + }, + }, + namespaces: []string{}, + expected: 0, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result := FilterOnlyFromNamespaces(tc.input, tc.namespaces) + assert.Len(s.T(), result.Containers, tc.expected, "Scenario: %s", tc.scenario) + }) + } +} + +// TestFilterOutWholeNamespaces tests namespace exclusion +func (s *DefinitionsTestSuite) TestFilterOutWholeNamespaces() { + testCases := []struct { + name string + scenario TestScenario + input ContainersList + namespaces []string + expected int + }{ + // Good scenarios + { + name: "exclude kube-system", + scenario: ScenarioGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + {Image: "coredns", ImageNamespace: "kube-system"}, + {Image: "redis", ImageNamespace: "apps"}, + }, + }, + namespaces: []string{"kube-system"}, + expected: 2, + }, + + // Not good scenarios + { + name: "exclude multiple system namespaces", + scenario: ScenarioNotGood, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + {Image: "coredns", ImageNamespace: "kube-system"}, + {Image: "cilium", ImageNamespace: "kube-system"}, + {Image: "local-path", ImageNamespace: "local-path-storage"}, + }, + }, + namespaces: []string{"kube-system", "local-path-storage"}, + expected: 1, + }, + + // Really bad scenarios + { + name: "exclude all namespaces", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + {Image: "redis", ImageNamespace: "apps"}, + }, + }, + namespaces: []string{"default", "apps"}, + expected: 0, + }, + { + name: "empty exclusion list", + scenario: ScenarioReallyBad, + input: ContainersList{ + Containers: []Container{ + {Image: "nginx", ImageNamespace: "default"}, + }, + }, + namespaces: []string{}, + expected: 1, // No exclusions = keep all + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result := FilterOutWholeNamespaces(tc.input, tc.namespaces) + assert.Len(s.T(), result.Containers, tc.expected, "Scenario: %s", tc.scenario) + }) + } +} + +// TestContainerStruct tests Container struct behavior +func (s *DefinitionsTestSuite) TestContainerStruct() { + s.Run("full container with all fields", func() { + c := Container{ + Image: "quay.io/cilium/cilium", + Tag: "v1.18.4", + Sha: "sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + FullName: "quay.io/cilium/cilium:v1.18.4@sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + ImageNamespace: "kube-system", + } + + assert.Equal(s.T(), "quay.io/cilium/cilium", c.Image) + assert.Equal(s.T(), "v1.18.4", c.Tag) + assert.Contains(s.T(), c.Sha, "sha256:") + assert.Contains(s.T(), c.FullName, "@") + }) + + s.Run("container equality for deduplication", func() { + c1 := Container{Image: "nginx", Tag: "latest", FullName: "nginx:latest"} + c2 := Container{Image: "nginx", Tag: "latest", FullName: "nginx:latest"} + c3 := Container{Image: "nginx", Tag: "1.21", FullName: "nginx:1.21"} + + assert.Equal(s.T(), c1, c2) + assert.NotEqual(s.T(), c1, c3) + }) +} + +// TestConstants tests that constants are defined correctly +func (s *DefinitionsTestSuite) TestConstants() { + // Status constants + assert.Equal(s.T(), "PENDING", STATUS_PENDING) + assert.Equal(s.T(), "STARTING", STATUS_STARTING) + assert.Equal(s.T(), "RETRYING", STATUS_RETRYING) + assert.Equal(s.T(), "RUNNING", STATUS_RUNNING) + assert.Equal(s.T(), "FAILED", STATUS_FAILED) + assert.Equal(s.T(), "COMPLETED", STATUS_SUCCESS) + assert.Equal(s.T(), "PRESENT", STATUS_PRESENT) + + // Storage constants + assert.Equal(s.T(), "S3", STORAGE_S3) + assert.Equal(s.T(), "FILE", STORAGE_FILE) +} + +// TestBackupJobImage tests the BACKUP_JOB_IMAGE initialization +func (s *DefinitionsTestSuite) TestBackupJobImage() { + require.NotEmpty(s.T(), BACKUP_JOB_IMAGE) + assert.Contains(s.T(), BACKUP_JOB_IMAGE, "kubernetes-images-sync-worker") +} diff --git a/internal/shared/jobs_test.go b/internal/shared/jobs_test.go new file mode 100644 index 0000000..e9e9331 --- /dev/null +++ b/internal/shared/jobs_test.go @@ -0,0 +1,547 @@ +package shared + +import ( + "testing" + + raczylocomv1 "github.com/lukaszraczylo/kubernetes-images-sync-operator/api/raczylo.com/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// JobsTestSuite tests job creation and related functionality +type JobsTestSuite struct { + suite.Suite +} + +func TestJobsTestSuite(t *testing.T) { + suite.Run(t, new(JobsTestSuite)) +} + +// TestCreateJob tests the CreateJob function with various scenarios +func (s *JobsTestSuite) TestCreateJob() { + testCases := []struct { + name string + scenario TestScenario + params JobParams + expectJobName string + expectNamespace string + expectImage string + expectAnnotations map[string]string + expectSecrets int + expectEnvVars int + }{ + // Good scenarios + { + name: "basic job creation", + scenario: ScenarioGood, + params: JobParams{ + Name: "test-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo hello"}, + }, + expectJobName: "test-job", + expectNamespace: "default", + expectImage: "worker:latest", + expectSecrets: 0, + expectEnvVars: 0, + }, + { + name: "job with annotations", + scenario: ScenarioGood, + params: JobParams{ + Name: "annotated-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo hello"}, + Annotations: map[string]string{ + "key1": "value1", + "key2": "value2", + }, + }, + expectJobName: "annotated-job", + expectNamespace: "default", + expectImage: "worker:latest", + expectAnnotations: map[string]string{ + "key1": "value1", + "key2": "value2", + }, + expectSecrets: 0, + expectEnvVars: 0, + }, + { + name: "job with image pull secrets", + scenario: ScenarioGood, + params: JobParams{ + Name: "secret-job", + Namespace: "default", + Image: "private-registry/image:latest", + Commands: []string{"echo hello"}, + ImagePullSecrets: []corev1.LocalObjectReference{ + {Name: "my-registry-secret"}, + }, + }, + expectJobName: "secret-job", + expectNamespace: "default", + expectImage: "private-registry/image:latest", + expectSecrets: 1, + expectEnvVars: 0, + }, + { + name: "job with multiple secrets", + scenario: ScenarioGood, + params: JobParams{ + Name: "multi-secret-job", + Namespace: "default", + Image: "private-registry/image:latest", + Commands: []string{"echo hello"}, + ImagePullSecrets: []corev1.LocalObjectReference{ + {Name: "secret1"}, + {Name: "secret2"}, + {Name: "secret3"}, + }, + }, + expectJobName: "multi-secret-job", + expectNamespace: "default", + expectImage: "private-registry/image:latest", + expectSecrets: 3, + expectEnvVars: 0, + }, + { + name: "job with environment variables", + scenario: ScenarioGood, + params: JobParams{ + Name: "env-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo $MY_VAR"}, + EnvVars: []corev1.EnvVar{ + {Name: "MY_VAR", Value: "my-value"}, + {Name: "AWS_REGION", Value: "us-east-1"}, + }, + }, + expectJobName: "env-job", + expectNamespace: "default", + expectImage: "worker:latest", + expectSecrets: 0, + expectEnvVars: 2, + }, + + // Not good scenarios + { + name: "job with backoff limit", + scenario: ScenarioNotGood, + params: JobParams{ + Name: "retry-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"might-fail"}, + BackoffLimit: int32Ptr(3), + }, + expectJobName: "retry-job", + expectNamespace: "default", + expectImage: "worker:latest", + expectSecrets: 0, + expectEnvVars: 0, + }, + { + name: "job with TTL after finished", + scenario: ScenarioNotGood, + params: JobParams{ + Name: "ttl-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo done"}, + TTLSecondsAfterFinished: int32Ptr(300), + }, + expectJobName: "ttl-job", + expectNamespace: "default", + expectImage: "worker:latest", + expectSecrets: 0, + expectEnvVars: 0, + }, + + // Really bad scenarios - edge cases + { + name: "job with empty commands", + scenario: ScenarioReallyBad, + params: JobParams{ + Name: "empty-cmd-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{}, + }, + expectJobName: "empty-cmd-job", + expectNamespace: "default", + expectImage: "worker:latest", + expectSecrets: 0, + expectEnvVars: 0, + }, + { + name: "job with very long name", + scenario: ScenarioReallyBad, + params: JobParams{ + Name: "this-is-a-very-long-job-name-that-might-cause-issues-in-kubernetes-because-names-have-limits", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo hello"}, + }, + expectJobName: "this-is-a-very-long-job-name-that-might-cause-issues-in-kubernetes-because-names-have-limits", + expectNamespace: "default", + expectImage: "worker:latest", + expectSecrets: 0, + expectEnvVars: 0, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + job := CreateJob(tc.params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + + require.NotNil(s.T(), job, "Job should not be nil") + + // Verify job metadata + assert.Equal(s.T(), tc.expectJobName, job.ObjectMeta.Name) + assert.Equal(s.T(), tc.expectNamespace, job.ObjectMeta.Namespace) + + // Verify labels + assert.Equal(s.T(), "image-export", job.ObjectMeta.Labels["app"]) + assert.Equal(s.T(), "image-export", job.Spec.Template.ObjectMeta.Labels["app"]) + + // Verify annotations if expected + if tc.expectAnnotations != nil { + for k, v := range tc.expectAnnotations { + assert.Equal(s.T(), v, job.ObjectMeta.Annotations[k]) + assert.Equal(s.T(), v, job.Spec.Template.ObjectMeta.Annotations[k]) + } + } + + // Verify pod template + podSpec := job.Spec.Template.Spec + require.Len(s.T(), podSpec.Containers, 1, "Should have exactly one container") + + container := podSpec.Containers[0] + assert.Equal(s.T(), "exporter", container.Name) + assert.Equal(s.T(), tc.expectImage, container.Image) + assert.True(s.T(), container.TTY) + + // Verify restart policy + assert.Equal(s.T(), corev1.RestartPolicyOnFailure, podSpec.RestartPolicy) + + // Verify secrets + assert.Len(s.T(), podSpec.ImagePullSecrets, tc.expectSecrets) + assert.Len(s.T(), podSpec.Volumes, tc.expectSecrets) + assert.Len(s.T(), container.VolumeMounts, tc.expectSecrets) + + // Verify environment variables + assert.Len(s.T(), container.Env, tc.expectEnvVars) + + // Verify security context (privileged for podman) + require.NotNil(s.T(), container.SecurityContext) + require.NotNil(s.T(), container.SecurityContext.Privileged) + assert.True(s.T(), *container.SecurityContext.Privileged) + }) + } +} + +// TestCreateJobWithOwnerReferences tests owner reference handling +func (s *JobsTestSuite) TestCreateJobWithOwnerReferences() { + ownerRefs := []metav1.OwnerReference{ + { + APIVersion: "raczylo.com/v1", + Kind: "ClusterImage", + Name: "test-image", + UID: "test-uid-12345", + }, + } + + params := JobParams{ + Name: "owned-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo hello"}, + OwnerReferences: ownerRefs, + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + + require.Len(s.T(), job.ObjectMeta.OwnerReferences, 1) + assert.Equal(s.T(), "ClusterImage", job.ObjectMeta.OwnerReferences[0].Kind) + assert.Equal(s.T(), "test-image", job.ObjectMeta.OwnerReferences[0].Name) +} + +// TestCreateJobCommands tests command concatenation +func (s *JobsTestSuite) TestCreateJobCommands() { + testCases := []struct { + name string + commands []string + expectedArgsLen int + expectedJoined string + }{ + { + name: "single command", + commands: []string{"echo hello"}, + expectedArgsLen: 3, // /bin/bash, -c, "command" + expectedJoined: "echo hello", + }, + { + name: "multiple commands", + commands: []string{"echo hello", "echo world"}, + expectedArgsLen: 3, + expectedJoined: "echo hello && echo world", + }, + { + name: "complex podman commands", + commands: []string{ + "podman pull nginx:latest", + "podman save --quiet -o /tmp/nginx.tar nginx:latest", + "./worker export /tmp/nginx.tar s3://bucket/path", + }, + expectedArgsLen: 3, + expectedJoined: "podman pull nginx:latest && podman save --quiet -o /tmp/nginx.tar nginx:latest && ./worker export /tmp/nginx.tar s3://bucket/path", + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + params := JobParams{ + Name: "cmd-test", + Namespace: "default", + Image: "worker:latest", + Commands: tc.commands, + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + + container := job.Spec.Template.Spec.Containers[0] + assert.Len(s.T(), container.Args, tc.expectedArgsLen) + assert.Equal(s.T(), "/bin/bash", container.Args[0]) + assert.Equal(s.T(), "-c", container.Args[1]) + assert.Equal(s.T(), tc.expectedJoined, container.Args[2]) + }) + } +} + +// TestCreateJobBackoffAndTTL tests backoff limit and TTL settings +func (s *JobsTestSuite) TestCreateJobBackoffAndTTL() { + backoff := int32(5) + ttl := int32(600) + + params := JobParams{ + Name: "backoff-ttl-job", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo hello"}, + BackoffLimit: &backoff, + TTLSecondsAfterFinished: &ttl, + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + + require.NotNil(s.T(), job.Spec.BackoffLimit) + assert.Equal(s.T(), int32(5), *job.Spec.BackoffLimit) + + require.NotNil(s.T(), job.Spec.TTLSecondsAfterFinished) + assert.Equal(s.T(), int32(600), *job.Spec.TTLSecondsAfterFinished) +} + +// TestSetupS3Params tests S3 parameter generation +func (s *JobsTestSuite) TestSetupS3Params() { + testCases := []struct { + name string + scenario TestScenario + config raczylocomv1.ClusterImageStorageS3 + expectContains []string + expectNotIn []string + }{ + // Good scenarios + { + name: "basic credentials", + scenario: ScenarioGood, + config: raczylocomv1.ClusterImageStorageS3{ + Bucket: "my-bucket", + Region: "us-east-1", + AccessKey: "AKIAIOSFODNN7EXAMPLE", + SecretKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + }, + expectContains: []string{ + "--aws_access_key_id='AKIAIOSFODNN7EXAMPLE'", + "--aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'", + }, + expectNotIn: []string{"--use_role", "--use_current_role"}, + }, + { + name: "use current role (IRSA)", + scenario: ScenarioGood, + config: raczylocomv1.ClusterImageStorageS3{ + Bucket: "my-bucket", + Region: "us-east-1", + UseRole: true, + }, + expectContains: []string{"--use_current_role"}, + expectNotIn: []string{"--aws_access_key_id", "--aws_secret_access_key"}, + }, + { + name: "use specific role ARN", + scenario: ScenarioGood, + config: raczylocomv1.ClusterImageStorageS3{ + Bucket: "my-bucket", + Region: "us-east-1", + UseRole: true, + RoleARN: "arn:aws:iam::123456789:role/MyRole", + }, + expectContains: []string{ + "--use_role", + "--role_name='arn:aws:iam::123456789:role/MyRole'", + }, + expectNotIn: []string{"--aws_access_key_id", "--use_current_role"}, + }, + + // Not good scenarios + { + name: "with custom endpoint (MinIO)", + scenario: ScenarioNotGood, + config: raczylocomv1.ClusterImageStorageS3{ + Bucket: "my-bucket", + Region: "us-east-1", + AccessKey: "minioadmin", + SecretKey: "minioadmin", + Endpoint: "http://minio.local:9000", + }, + expectContains: []string{ + "--endpoint_url='http://minio.local:9000'", + "--aws_access_key_id='minioadmin'", + }, + }, + + // Really bad scenarios + { + name: "empty credentials with UseRole false", + scenario: ScenarioReallyBad, + config: raczylocomv1.ClusterImageStorageS3{ + Bucket: "my-bucket", + Region: "us-east-1", + }, + expectContains: []string{ + "--aws_access_key_id=''", + "--aws_secret_access_key=''", + }, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + params := SetupS3Params(tc.config) + + // Check expected parameters are present + for _, expected := range tc.expectContains { + found := false + for _, param := range params { + if param == expected { + found = true + break + } + } + assert.True(s.T(), found, "Expected parameter not found: %s", expected) + } + + // Check unexpected parameters are not present + for _, notExpected := range tc.expectNotIn { + for _, param := range params { + assert.NotContains(s.T(), param, notExpected, "Unexpected parameter found: %s", notExpected) + } + } + }) + } +} + +// TestJobParamsDefaults tests JobParams default handling +func (s *JobsTestSuite) TestJobParamsDefaults() { + s.Run("nil backoff limit", func() { + params := JobParams{ + Name: "test", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo"}, + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + assert.Nil(s.T(), job.Spec.BackoffLimit) + }) + + s.Run("nil TTL", func() { + params := JobParams{ + Name: "test", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo"}, + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + assert.Nil(s.T(), job.Spec.TTLSecondsAfterFinished) + }) + + s.Run("empty service account uses env var", func() { + // This test verifies that when ServiceAccount is empty, + // the job will use POD_SERVICE_ACCOUNT env var + params := JobParams{ + Name: "test", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo"}, + ServiceAccount: "", // Empty + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + // Service account will be set from environment variable POD_SERVICE_ACCOUNT + // In tests, this will be empty, so we just verify the job is created + require.NotNil(s.T(), job) + }) +} + +// TestSecretVolumeMounting tests that secrets are properly mounted +func (s *JobsTestSuite) TestSecretVolumeMounting() { + secrets := []corev1.LocalObjectReference{ + {Name: "docker-registry"}, + {Name: "gcr-json-key"}, + {Name: "ecr-credentials"}, + } + + params := JobParams{ + Name: "secret-mount-test", + Namespace: "default", + Image: "worker:latest", + Commands: []string{"echo"}, + ImagePullSecrets: secrets, + } + + job := CreateJob(params, func(raczylocomv1.ClusterImageExport) []string { return nil }) + podSpec := job.Spec.Template.Spec + container := podSpec.Containers[0] + + // Verify volumes are created + require.Len(s.T(), podSpec.Volumes, 3) + for i, vol := range podSpec.Volumes { + assert.Equal(s.T(), secrets[i].Name, vol.VolumeSource.Secret.SecretName) + assert.Contains(s.T(), vol.Name, "secret-") + } + + // Verify volume mounts + require.Len(s.T(), container.VolumeMounts, 3) + for i, mount := range container.VolumeMounts { + assert.Contains(s.T(), mount.MountPath, ".docker-secret-") + assert.True(s.T(), mount.ReadOnly) + assert.Contains(s.T(), mount.Name, "secret-") + // Verify index is correct + expectedIndex := i + assert.Equal(s.T(), "/home/runner/.docker-secret-"+string(rune('0'+expectedIndex)), mount.MountPath) + } +} + +// Helper function +func int32Ptr(i int32) *int32 { + return &i +} diff --git a/internal/shared/k8s_test.go b/internal/shared/k8s_test.go new file mode 100644 index 0000000..938dd04 --- /dev/null +++ b/internal/shared/k8s_test.go @@ -0,0 +1,1219 @@ +package shared + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +// K8sTestSuite tests Kubernetes-related utility functions +type K8sTestSuite struct { + suite.Suite +} + +func TestK8sTestSuite(t *testing.T) { + suite.Run(t, new(K8sTestSuite)) +} + +// ProcessContainerNameTestCase represents a test case for ProcessContainerName +type ProcessContainerNameTestCase struct { + name string + scenario TestScenario + input string + expectedImage string + expectedTag string + expectedSha string + expectError bool + errorContains string +} + +// TestProcessContainerName tests the ProcessContainerName function with comprehensive scenarios +func (s *K8sTestSuite) TestProcessContainerName() { + testCases := []ProcessContainerNameTestCase{ + // ========== GOOD SCENARIOS ========== + // Standard Docker Hub images + { + name: "simple image name defaults to latest", + scenario: ScenarioGood, + input: "nginx", + expectedImage: "nginx", + expectedTag: "latest", + expectedSha: "", + expectError: false, + }, + { + name: "image with latest tag", + scenario: ScenarioGood, + input: "nginx:latest", + expectedImage: "nginx", + expectedTag: "latest", + expectedSha: "", + expectError: false, + }, + { + name: "image with version tag", + scenario: ScenarioGood, + input: "nginx:1.21.0", + expectedImage: "nginx", + expectedTag: "1.21.0", + expectedSha: "", + expectError: false, + }, + { + name: "image with semver tag", + scenario: ScenarioGood, + input: "redis:6.2.6", + expectedImage: "redis", + expectedTag: "6.2.6", + expectedSha: "", + expectError: false, + }, + + // Images with registry prefix + { + name: "gcr.io registry", + scenario: ScenarioGood, + input: "gcr.io/google-containers/pause:3.2", + expectedImage: "gcr.io/google-containers/pause", + expectedTag: "3.2", + expectedSha: "", + expectError: false, + }, + { + name: "ghcr.io registry", + scenario: ScenarioGood, + input: "ghcr.io/owner/repo:v1.0.0", + expectedImage: "ghcr.io/owner/repo", + expectedTag: "v1.0.0", + expectedSha: "", + expectError: false, + }, + { + name: "quay.io registry", + scenario: ScenarioGood, + input: "quay.io/coreos/etcd:v3.5.0", + expectedImage: "quay.io/coreos/etcd", + expectedTag: "v3.5.0", + expectedSha: "", + expectError: false, + }, + { + name: "registry.k8s.io", + scenario: ScenarioGood, + input: "registry.k8s.io/pause:3.9", + expectedImage: "registry.k8s.io/pause", + expectedTag: "3.9", + expectedSha: "", + expectError: false, + }, + { + name: "docker.io explicit registry", + scenario: ScenarioGood, + input: "docker.io/library/nginx:latest", + expectedImage: "docker.io/library/nginx", + expectedTag: "latest", + expectedSha: "", + expectError: false, + }, + + // Private registry with port + { + name: "private registry with port", + scenario: ScenarioGood, + input: "myregistry.local:5000/myimage:v1", + expectedImage: "myregistry.local", + expectedTag: "5000/myimage:v1", // This is the current behavior + expectedSha: "", + expectError: false, + }, + + // ========== NOT GOOD SCENARIOS ========== + // SHA-only references (no tag) + { + name: "image with sha256 digest only", + scenario: ScenarioNotGood, + input: "nginx@sha256:abc123def456789012345678901234567890123456789012345678901234", + expectedImage: "nginx", + expectedTag: "", + expectedSha: "sha256:abc123def456789012345678901234567890123456789012345678901234", + expectError: false, + }, + { + name: "registry image with sha only", + scenario: ScenarioNotGood, + input: "gcr.io/distroless/static@sha256:abcdef1234567890", + expectedImage: "gcr.io/distroless/static", + expectedTag: "", + expectedSha: "sha256:abcdef1234567890", + expectError: false, + }, + + // Tag + SHA references (pinned images) + { + name: "cilium with tag and sha - real world example", + scenario: ScenarioNotGood, + input: "quay.io/cilium/cilium:v1.18.4@sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + expectedImage: "quay.io/cilium/cilium", + expectedTag: "v1.18.4", + expectedSha: "sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + expectError: false, + }, + { + name: "distroless with tag and sha", + scenario: ScenarioNotGood, + input: "gcr.io/distroless/static:nonroot@sha256:abc123", + expectedImage: "gcr.io/distroless/static", + expectedTag: "nonroot", + expectedSha: "sha256:abc123", + expectError: false, + }, + + // Complex nested paths + { + name: "deeply nested registry path", + scenario: ScenarioNotGood, + input: "us-docker.pkg.dev/project/repo/subdir/image:tag", + expectedImage: "us-docker.pkg.dev/project/repo/subdir/image", + expectedTag: "tag", + expectedSha: "", + expectError: false, + }, + + // ========== REALLY BAD SCENARIOS ========== + // Empty and invalid inputs + { + name: "empty string", + scenario: ScenarioReallyBad, + input: "", + expectedImage: "", + expectedTag: "", + expectedSha: "", + expectError: true, + errorContains: "image name is required", + }, + { + name: "only colon", + scenario: ScenarioReallyBad, + input: ":", + expectedImage: "", + expectedTag: "", + expectedSha: "", + expectError: true, + errorContains: "image name is required", + }, + { + name: "only at sign", + scenario: ScenarioReallyBad, + input: "@", + expectedImage: "", + expectedTag: "", + expectedSha: "", + expectError: true, + errorContains: "invalid SHA format", + }, + + // Invalid SHA formats + { + name: "invalid sha format - missing colon", + scenario: ScenarioReallyBad, + input: "nginx@sha256abc123", + expectedImage: "", + expectedTag: "", + expectedSha: "", + expectError: true, + errorContains: "invalid SHA format", + }, + { + name: "invalid sha format - wrong algorithm", + scenario: ScenarioReallyBad, + input: "nginx@md5:abc123", + expectedImage: "", + expectedTag: "", + expectedSha: "", + expectError: true, + errorContains: "invalid SHA format", + }, + { + name: "multiple @ symbols", + scenario: ScenarioReallyBad, + input: "nginx@sha256:abc@sha256:def", + expectedImage: "", + expectedTag: "", + expectedSha: "", + expectError: true, + errorContains: "invalid container name format", + }, + + // Edge cases for Kubernetes volatility + { + name: "k8s pause image", + scenario: ScenarioGood, + input: "registry.k8s.io/pause:3.9", + expectedImage: "registry.k8s.io/pause", + expectedTag: "3.9", + expectedSha: "", + expectError: false, + }, + { + name: "coredns image", + scenario: ScenarioGood, + input: "registry.k8s.io/coredns/coredns:v1.11.1", + expectedImage: "registry.k8s.io/coredns/coredns", + expectedTag: "v1.11.1", + expectedSha: "", + expectError: false, + }, + { + name: "etcd with sha pinning", + scenario: ScenarioNotGood, + input: "registry.k8s.io/etcd:3.5.12-0@sha256:abc123", + expectedImage: "registry.k8s.io/etcd", + expectedTag: "3.5.12-0", + expectedSha: "sha256:abc123", + expectError: false, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result, err := ProcessContainerName(tc.input) + + if tc.expectError { + require.Error(s.T(), err, "Scenario: %s - Expected error for input: %s", tc.scenario, tc.input) + if tc.errorContains != "" { + assert.Contains(s.T(), err.Error(), tc.errorContains) + } + } else { + require.NoError(s.T(), err, "Scenario: %s - Unexpected error for input: %s", tc.scenario, tc.input) + assert.Equal(s.T(), tc.expectedImage, result.Image, "Image mismatch") + assert.Equal(s.T(), tc.expectedTag, result.Tag, "Tag mismatch") + assert.Equal(s.T(), tc.expectedSha, result.Sha, "SHA mismatch") + + // Verify FullName is preserved + if tc.input != "" { + assert.Equal(s.T(), tc.input, result.FullName, "FullName should match input") + } + } + }) + } +} + +// TestProcessContainerNameCaching tests that the container cache works correctly +func (s *K8sTestSuite) TestProcessContainerNameCaching() { + // Clear the cache first by processing a unique image + uniqueImage := "test-cache-" + time.Now().Format("20060102150405") + + // First call - should not be cached + result1, err := ProcessContainerName(uniqueImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), uniqueImage, result1.Image) + assert.Equal(s.T(), "latest", result1.Tag) // Defaults to latest + + // Second call - should be cached + result2, err := ProcessContainerName(uniqueImage) + require.NoError(s.T(), err) + assert.Equal(s.T(), result1, result2, "Cached result should match original") +} + +// TestProcessContainerNameConcurrency tests thread safety of ProcessContainerName +func (s *K8sTestSuite) TestProcessContainerNameConcurrency() { + images := []string{ + "nginx:latest", + "redis:6", + "postgres:14", + "mysql:8", + "mongo:5", + } + + done := make(chan bool, len(images)*10) + errors := make(chan error, len(images)*10) + + // Run 10 goroutines per image + for i := 0; i < 10; i++ { + for _, img := range images { + go func(image string) { + _, err := ProcessContainerName(image) + if err != nil { + errors <- err + } + done <- true + }(img) + } + } + + // Wait for all goroutines + for i := 0; i < len(images)*10; i++ { + <-done + } + + // Check for errors + close(errors) + for err := range errors { + s.T().Errorf("Concurrent access error: %v", err) + } +} + +// TestContainerCacheOperations tests the ContainerCache directly +func (s *K8sTestSuite) TestContainerCacheOperations() { + cache := &ContainerCache{ + cache: make(map[string]Container), + } + + // Test Set and Get + testContainer := Container{ + Image: "test-image", + Tag: "v1.0.0", + FullName: "test-image:v1.0.0", + } + + cache.Set("test-key", testContainer) + + retrieved, ok := cache.Get("test-key") + assert.True(s.T(), ok, "Should find cached container") + assert.Equal(s.T(), testContainer, retrieved) + + // Test Get non-existent key + _, ok = cache.Get("non-existent") + assert.False(s.T(), ok, "Should not find non-existent key") +} + +// TestProcessContainerWithContext tests context cancellation +func (s *K8sTestSuite) TestProcessContainerWithContext() { + ctx, cancel := context.WithCancel(context.Background()) + + // Test with active context + containersList := &ContainersList{} + err := processContainer(ctx, "nginx:latest", "default", containersList) + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + + // Test with cancelled context + cancel() + err = processContainer(ctx, "redis:6", "default", containersList) + assert.Error(s.T(), err) + assert.Equal(s.T(), context.Canceled, err) +} + +// TestKubernetesVolatilityScenarios tests scenarios specific to Kubernetes environment volatility +func (s *K8sTestSuite) TestKubernetesVolatilityScenarios() { + testCases := []struct { + name string + description string + input string + expectError bool + }{ + // Pod restart scenarios - same image, different instance + { + name: "pod_restart_same_image", + description: "When a pod restarts, the same image reference should parse identically", + input: "nginx:1.21", + expectError: false, + }, + // Rolling update scenarios + { + name: "rolling_update_new_tag", + description: "During rolling update, new tag version", + input: "myapp:v2.0.0", + expectError: false, + }, + // Image pull scenarios + { + name: "image_pull_backoff_recovery", + description: "Image that might have failed to pull initially", + input: "private-registry.io/secure/image:latest", + expectError: false, + }, + // Helm chart common patterns + { + name: "helm_chart_image_pattern", + description: "Common Helm chart image reference pattern", + input: "bitnami/postgresql:14.5.0-debian-11-r14", + expectError: false, + }, + // Operator-managed images + { + name: "operator_managed_image", + description: "Image managed by an operator with specific versioning", + input: "quay.io/prometheus/prometheus:v2.45.0", + expectError: false, + }, + // Init container images + { + name: "init_container_image", + description: "Common init container image", + input: "busybox:1.36", + expectError: false, + }, + // Sidecar images + { + name: "sidecar_envoy_proxy", + description: "Envoy sidecar proxy image", + input: "envoyproxy/envoy:v1.28.0", + expectError: false, + }, + // CSI driver images + { + name: "csi_driver_image", + description: "CSI driver image with complex path", + input: "registry.k8s.io/sig-storage/csi-provisioner:v3.6.0", + expectError: false, + }, + // Admission webhook images + { + name: "admission_webhook_image", + description: "Admission webhook controller image", + input: "k8s.gcr.io/ingress-nginx/kube-webhook-certgen:v1.3.0", + expectError: false, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + result, err := ProcessContainerName(tc.input) + + if tc.expectError { + assert.Error(s.T(), err, "Description: %s", tc.description) + } else { + assert.NoError(s.T(), err, "Description: %s", tc.description) + assert.NotEmpty(s.T(), result.Image, "Image should not be empty") + assert.Equal(s.T(), tc.input, result.FullName, "FullName should be preserved") + } + }) + } +} + +// TestImageParsingEdgeCases tests edge cases that might occur in real Kubernetes clusters +func (s *K8sTestSuite) TestImageParsingEdgeCases() { + s.Run("image with plus sign in tag", func() { + // Some images use + in tags (e.g., build metadata) + result, err := ProcessContainerName("myimage:v1.0.0+build123") + require.NoError(s.T(), err) + assert.Equal(s.T(), "myimage", result.Image) + assert.Equal(s.T(), "v1.0.0+build123", result.Tag) + }) + + s.Run("image with underscore in name", func() { + result, err := ProcessContainerName("my_custom_image:latest") + require.NoError(s.T(), err) + assert.Equal(s.T(), "my_custom_image", result.Image) + }) + + s.Run("image with dash in tag", func() { + result, err := ProcessContainerName("nginx:1.21-alpine") + require.NoError(s.T(), err) + assert.Equal(s.T(), "1.21-alpine", result.Tag) + }) + + s.Run("image with rc/beta tag", func() { + result, err := ProcessContainerName("kubernetes/kube-apiserver:v1.29.0-rc.0") + require.NoError(s.T(), err) + assert.Equal(s.T(), "v1.29.0-rc.0", result.Tag) + }) + + s.Run("image with only sha256 digest (no tag)", func() { + sha := "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + result, err := ProcessContainerName("alpine@" + sha) + require.NoError(s.T(), err) + assert.Equal(s.T(), "alpine", result.Image) + assert.Equal(s.T(), "", result.Tag) + assert.Equal(s.T(), sha, result.Sha) + }) + + s.Run("aws ecr image", func() { + result, err := ProcessContainerName("123456789.dkr.ecr.us-east-1.amazonaws.com/my-repo:latest") + require.NoError(s.T(), err) + assert.Equal(s.T(), "123456789.dkr.ecr.us-east-1.amazonaws.com/my-repo", result.Image) + assert.Equal(s.T(), "latest", result.Tag) + }) + + s.Run("azure acr image", func() { + result, err := ProcessContainerName("myregistry.azurecr.io/samples/nginx:v1") + require.NoError(s.T(), err) + assert.Equal(s.T(), "myregistry.azurecr.io/samples/nginx", result.Image) + assert.Equal(s.T(), "v1", result.Tag) + }) + + s.Run("google artifact registry", func() { + result, err := ProcessContainerName("us-central1-docker.pkg.dev/my-project/my-repo/my-image:tag") + require.NoError(s.T(), err) + assert.Equal(s.T(), "us-central1-docker.pkg.dev/my-project/my-repo/my-image", result.Image) + assert.Equal(s.T(), "tag", result.Tag) + }) +} + +// BenchmarkProcessContainerName benchmarks the ProcessContainerName function +func BenchmarkProcessContainerName(b *testing.B) { + images := []string{ + "nginx", + "nginx:latest", + "quay.io/cilium/cilium:v1.18.4@sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + "gcr.io/google-containers/pause:3.2", + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, img := range images { + _, _ = ProcessContainerName(img) + } + } +} + +// BenchmarkNormalizeImageName benchmarks the NormalizeImageName function +func BenchmarkNormalizeImageName(b *testing.B) { + images := []string{ + "nginx", + "nginx:latest", + "quay.io/cilium/cilium:v1.18.4@sha256:49d87af187eeeb9e9e3ec2bc6bd372261a0b5cb2d845659463ba7cc10fe9e45f", + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, img := range images { + _ = NormalizeImageName(img) + } + } +} + +// ========== K8s Resource Wrapper Tests ========== + +// TestDeploymentWrapper tests the DeploymentWrapper GetPodSpec method +func (s *K8sTestSuite) TestDeploymentWrapper() { + deployment := appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main", Image: "nginx:latest"}, + }, + InitContainers: []corev1.Container{ + {Name: "init", Image: "busybox:1.36"}, + }, + }, + }, + }, + } + + wrapper := (*DeploymentWrapper)(&deployment) + podSpec := wrapper.GetPodSpec() + + require.NotNil(s.T(), podSpec) + assert.Len(s.T(), podSpec.Containers, 1) + assert.Equal(s.T(), "nginx:latest", podSpec.Containers[0].Image) + assert.Len(s.T(), podSpec.InitContainers, 1) + assert.Equal(s.T(), "busybox:1.36", podSpec.InitContainers[0].Image) +} + +// TestJobWrapper tests the JobWrapper GetPodSpec method +func (s *K8sTestSuite) TestJobWrapper() { + job := batchv1.Job{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "worker", Image: "worker:v1.0"}, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + }, + } + + wrapper := (*JobWrapper)(&job) + podSpec := wrapper.GetPodSpec() + + require.NotNil(s.T(), podSpec) + assert.Len(s.T(), podSpec.Containers, 1) + assert.Equal(s.T(), "worker:v1.0", podSpec.Containers[0].Image) +} + +// TestDaemonSetWrapper tests the DaemonSetWrapper GetPodSpec method +func (s *K8sTestSuite) TestDaemonSetWrapper() { + daemonSet := appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "agent", Image: "prometheus/node-exporter:v1.6.0"}, + }, + }, + }, + }, + } + + wrapper := (*DaemonSetWrapper)(&daemonSet) + podSpec := wrapper.GetPodSpec() + + require.NotNil(s.T(), podSpec) + assert.Len(s.T(), podSpec.Containers, 1) + assert.Equal(s.T(), "prometheus/node-exporter:v1.6.0", podSpec.Containers[0].Image) +} + +// TestCronJobWrapper tests the CronJobWrapper GetPodSpec method +func (s *K8sTestSuite) TestCronJobWrapper() { + cronJob := batchv1.CronJob{ + Spec: batchv1.CronJobSpec{ + Schedule: "*/5 * * * *", + JobTemplate: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "cron-worker", Image: "myapp/cron:latest"}, + }, + RestartPolicy: corev1.RestartPolicyOnFailure, + }, + }, + }, + }, + }, + } + + wrapper := (*CronJobWrapper)(&cronJob) + podSpec := wrapper.GetPodSpec() + + require.NotNil(s.T(), podSpec) + assert.Len(s.T(), podSpec.Containers, 1) + assert.Equal(s.T(), "myapp/cron:latest", podSpec.Containers[0].Image) +} + +// ========== ProcessContainers Tests ========== + +// TestProcessContainers tests the processContainers function +func (s *K8sTestSuite) TestProcessContainers() { + ctx := context.Background() + + s.Run("deployment with containers and init containers", func() { + deployment := appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main", Image: "nginx:1.21"}, + {Name: "sidecar", Image: "envoyproxy/envoy:v1.28"}, + }, + InitContainers: []corev1.Container{ + {Name: "init-db", Image: "postgres:15"}, + }, + }, + }, + }, + } + + wrapper := (*DeploymentWrapper)(&deployment) + containersList := &ContainersList{} + err := processContainers(ctx, wrapper, "default", containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 3) + + // Verify all images are processed + images := make([]string, len(containersList.Containers)) + for i, c := range containersList.Containers { + images[i] = c.FullName + } + assert.Contains(s.T(), images, "nginx:1.21") + assert.Contains(s.T(), images, "envoyproxy/envoy:v1.28") + assert.Contains(s.T(), images, "postgres:15") + + // Verify namespace is set + for _, c := range containersList.Containers { + assert.Equal(s.T(), "default", c.ImageNamespace) + } + }) + + s.Run("job with single container", func() { + job := batchv1.Job{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "worker", Image: "myapp/worker:v2.0"}, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + }, + } + + wrapper := (*JobWrapper)(&job) + containersList := &ContainersList{} + err := processContainers(ctx, wrapper, "production", containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "myapp/worker", containersList.Containers[0].Image) + assert.Equal(s.T(), "v2.0", containersList.Containers[0].Tag) + assert.Equal(s.T(), "production", containersList.Containers[0].ImageNamespace) + }) + + s.Run("daemonset with multiple containers", func() { + daemonSet := appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "exporter", Image: "prom/node-exporter:v1.6.0"}, + {Name: "collector", Image: "otel/opentelemetry-collector:0.88.0"}, + }, + }, + }, + }, + } + + wrapper := (*DaemonSetWrapper)(&daemonSet) + containersList := &ContainersList{} + err := processContainers(ctx, wrapper, "monitoring", containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 2) + }) + + s.Run("cronjob with container", func() { + cronJob := batchv1.CronJob{ + Spec: batchv1.CronJobSpec{ + Schedule: "0 * * * *", + JobTemplate: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "backup", Image: "restic/restic:0.16.2"}, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + }, + }, + }, + } + + wrapper := (*CronJobWrapper)(&cronJob) + containersList := &ContainersList{} + err := processContainers(ctx, wrapper, "backup", containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "restic/restic", containersList.Containers[0].Image) + }) + + s.Run("deployment with ephemeral containers", func() { + deployment := appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main", Image: "myapp:v1"}, + }, + EphemeralContainers: []corev1.EphemeralContainer{ + { + EphemeralContainerCommon: corev1.EphemeralContainerCommon{ + Name: "debugger", + Image: "busybox:1.36", + }, + }, + }, + }, + }, + }, + } + + wrapper := (*DeploymentWrapper)(&deployment) + containersList := &ContainersList{} + err := processContainers(ctx, wrapper, "default", containersList) + + require.NoError(s.T(), err) + // Should include both main container and ephemeral container + assert.Len(s.T(), containersList.Containers, 2) + }) + + s.Run("context cancellation", func() { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + deployment := appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main", Image: "nginx:latest"}, + }, + }, + }, + }, + } + + wrapper := (*DeploymentWrapper)(&deployment) + containersList := &ContainersList{} + err := processContainers(ctx, wrapper, "default", containersList) + + // Should return context error + assert.Error(s.T(), err) + assert.ErrorIs(s.T(), err, context.Canceled) + }) +} + +// TestProcessContainer tests the processContainer function directly +func (s *K8sTestSuite) TestProcessContainer() { + ctx := context.Background() + + s.Run("valid image", func() { + containersList := &ContainersList{} + err := processContainer(ctx, "nginx:1.21", "default", containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "nginx", containersList.Containers[0].Image) + assert.Equal(s.T(), "1.21", containersList.Containers[0].Tag) + assert.Equal(s.T(), "default", containersList.Containers[0].ImageNamespace) + }) + + s.Run("image with sha", func() { + sha := "sha256:abc123def456" + containersList := &ContainersList{} + err := processContainer(ctx, "nginx:1.21@"+sha, "production", containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "nginx", containersList.Containers[0].Image) + assert.Equal(s.T(), "1.21", containersList.Containers[0].Tag) + assert.Equal(s.T(), sha, containersList.Containers[0].Sha) + }) + + s.Run("context already cancelled", func() { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + containersList := &ContainersList{} + err := processContainer(ctx, "nginx:latest", "default", containersList) + + assert.Error(s.T(), err) + assert.ErrorIs(s.T(), err, context.Canceled) + }) + + s.Run("invalid image name", func() { + containersList := &ContainersList{} + err := processContainer(ctx, "@@@invalid@@@", "default", containersList) + + assert.Error(s.T(), err) + }) +} + +// TestContainerCache tests the container cache functionality +func (s *K8sTestSuite) TestContainerCache() { + // Clear cache first + containerCache.Lock() + containerCache.cache = make(map[string]Container) + containerCache.Unlock() + + s.Run("cache miss then hit", func() { + // First call - cache miss + result1, err := ProcessContainerName("redis:7.0") + require.NoError(s.T(), err) + assert.Equal(s.T(), "redis", result1.Image) + + // Second call - should hit cache + result2, err := ProcessContainerName("redis:7.0") + require.NoError(s.T(), err) + assert.Equal(s.T(), result1, result2) + }) + + s.Run("get from empty cache", func() { + _, found := containerCache.Get("nonexistent:tag") + assert.False(s.T(), found) + }) + + s.Run("set and get from cache", func() { + testContainer := Container{ + Image: "test-image", + Tag: "test-tag", + FullName: "test-image:test-tag", + } + containerCache.Set("test-key", testContainer) + + retrieved, found := containerCache.Get("test-key") + assert.True(s.T(), found) + assert.Equal(s.T(), testContainer, retrieved) + }) +} + +// TestConcurrentProcessing tests concurrent container processing +func (s *K8sTestSuite) TestConcurrentProcessing() { + ctx := context.Background() + + s.Run("concurrent processContainer calls", func() { + images := []string{ + "nginx:1.21", + "redis:7.0", + "postgres:15", + "mysql:8.0", + "mongodb:6.0", + } + + results := make(chan Container, len(images)) + errors := make(chan error, len(images)) + + for _, img := range images { + go func(image string) { + containersList := &ContainersList{} + err := processContainer(ctx, image, "default", containersList) + if err != nil { + errors <- err + return + } + if len(containersList.Containers) > 0 { + results <- containersList.Containers[0] + } + }(img) + } + + // Collect results + collected := 0 + for collected < len(images) { + select { + case <-results: + collected++ + case err := <-errors: + s.T().Errorf("Unexpected error: %v", err) + collected++ + case <-time.After(5 * time.Second): + s.T().Fatal("Timeout waiting for concurrent processing") + } + } + + assert.Equal(s.T(), len(images), collected) + }) +} + +// Helper to create a fake client +func (s *K8sTestSuite) newFakeClient(objs ...client.Object) client.Client { + scheme := runtime.NewScheme() + _ = appsv1.AddToScheme(scheme) + _ = batchv1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + return fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objs...). + Build() +} + +// TestListAndProcessResources tests the ListAndProcessResources function +func (s *K8sTestSuite) TestListAndProcessResources() { + ctx := context.Background() + + s.Run("process deployments", func() { + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-deployment", + Namespace: "default", + }, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "test"}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "test"}}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main", Image: "nginx:1.21"}, + {Name: "sidecar", Image: "envoy:v1.28"}, + }, + }, + }, + }, + } + + fakeClient := s.newFakeClient(deployment) + containersList := &ContainersList{} + err := ListAndProcessResources[*DeploymentWrapper](ctx, fakeClient, &appsv1.DeploymentList{}, containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 2) + + // Verify images + images := make(map[string]bool) + for _, c := range containersList.Containers { + images[c.FullName] = true + } + assert.True(s.T(), images["nginx:1.21"]) + assert.True(s.T(), images["envoy:v1.28"]) + }) + + s.Run("process jobs", func() { + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-job", + Namespace: "default", + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "worker", Image: "busybox:1.36"}, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + }, + } + + fakeClient := s.newFakeClient(job) + containersList := &ContainersList{} + err := ListAndProcessResources[*JobWrapper](ctx, fakeClient, &batchv1.JobList{}, containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "busybox", containersList.Containers[0].Image) + }) + + s.Run("process daemonsets", func() { + daemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ds", + Namespace: "monitoring", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "exporter"}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "exporter"}}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "exporter", Image: "prom/node-exporter:v1.6.0"}, + }, + }, + }, + }, + } + + fakeClient := s.newFakeClient(daemonSet) + containersList := &ContainersList{} + err := ListAndProcessResources[*DaemonSetWrapper](ctx, fakeClient, &appsv1.DaemonSetList{}, containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "monitoring", containersList.Containers[0].ImageNamespace) + }) + + s.Run("process cronjobs", func() { + cronJob := &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cron", + Namespace: "batch", + }, + Spec: batchv1.CronJobSpec{ + Schedule: "0 * * * *", + JobTemplate: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "cron", Image: "alpine:3.18"}, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + }, + }, + }, + } + + fakeClient := s.newFakeClient(cronJob) + containersList := &ContainersList{} + err := ListAndProcessResources[*CronJobWrapper](ctx, fakeClient, &batchv1.CronJobList{}, containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 1) + assert.Equal(s.T(), "alpine", containersList.Containers[0].Image) + }) + + s.Run("process multiple resources", func() { + deployment1 := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "deploy-1", Namespace: "ns1"}, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "app1"}}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "app1"}}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "main", Image: "app1:v1"}}, + }, + }, + }, + } + deployment2 := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "deploy-2", Namespace: "ns2"}, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "app2"}}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "app2"}}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "main", Image: "app2:v1"}}, + }, + }, + }, + } + + fakeClient := s.newFakeClient(deployment1, deployment2) + containersList := &ContainersList{} + err := ListAndProcessResources[*DeploymentWrapper](ctx, fakeClient, &appsv1.DeploymentList{}, containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 2) + }) + + s.Run("empty list", func() { + fakeClient := s.newFakeClient() + containersList := &ContainersList{} + err := ListAndProcessResources[*DeploymentWrapper](ctx, fakeClient, &appsv1.DeploymentList{}, containersList) + + require.NoError(s.T(), err) + assert.Len(s.T(), containersList.Containers, 0) + }) + + s.Run("context cancellation", func() { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "test"}}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "test"}}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "main", Image: "test:v1"}}, + }, + }, + }, + } + + fakeClient := s.newFakeClient(deployment) + containersList := &ContainersList{} + err := ListAndProcessResources[*DeploymentWrapper](ctx, fakeClient, &appsv1.DeploymentList{}, containersList) + + // Context cancellation should result in an error + assert.Error(s.T(), err) + }) + + s.Run("unsupported list type", func() { + fakeClient := s.newFakeClient() + containersList := &ContainersList{} + // Pass an unsupported list type (corev1.PodList is not handled) + err := ListAndProcessResources[*DeploymentWrapper](ctx, fakeClient, &corev1.PodList{}, containersList) + + assert.Error(s.T(), err) + assert.Contains(s.T(), err.Error(), "unsupported list type") + }) +}