From 39dda275f1d0afd36858fdd8a9d86ce33a44316b Mon Sep 17 00:00:00 2001 From: Lukas Piwowarski Date: Thu, 12 Dec 2024 11:57:41 -0500 Subject: [PATCH 1/2] Move away from Jobs to Pods The test-operator is using Jobs to spawn test pods even though it does not use any features of this k8s object. Plus usage of the Jobs requires creation of ServiceAccount in the target namespaces. In order to be able to create a new, SA the test-oprator has to have a rights to create new roles and rolebindings which in our case makes the attack surface larger. This patch drops the usage of Jobs and moves to Pods. Depends-On: https://github.com/openstack-k8s-operators/ci-framework/pull/2604 --- controllers/ansibletest_controller.go | 25 ++---- controllers/common.go | 117 +++++++++++++++++--------- controllers/horizontest_controller.go | 23 ++--- controllers/tempest_controller.go | 25 ++---- controllers/tobiko_controller.go | 25 ++---- pkg/ansibletest/job.go | 74 +++++++--------- pkg/horizontest/job.go | 72 +++++++--------- pkg/tempest/job.go | 98 ++++++++++----------- pkg/tobiko/job.go | 80 +++++++----------- 9 files changed, 243 insertions(+), 296 deletions(-) diff --git a/controllers/ansibletest_controller.go b/controllers/ansibletest_controller.go index c31ccc5a..9409c4fa 100644 --- a/controllers/ansibletest_controller.go +++ b/controllers/ansibletest_controller.go @@ -21,7 +21,6 @@ import ( "errors" "fmt" "strconv" - "time" "reflect" @@ -30,12 +29,10 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" - "github.com/openstack-k8s-operators/test-operator/api/v1beta1" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" + v1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/ansibletest" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -161,7 +158,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, false) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -170,7 +167,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This is useful to check if for // example somebody / something deleted the lock and it got claimed by // another instance. This is considered to be an error state. @@ -213,7 +210,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Create a new job mountCerts := r.CheckSecretExists(ctx, instance, "combined-ca-bundle") - jobName := r.GetJobName(instance, nextWorkflowStep) + jobName := r.GetPodName(instance, nextWorkflowStep) envVars, workflowOverrideParams := r.PrepareAnsibleEnv(instance, nextWorkflowStep) logsPVCName := r.GetPVCLogsName(instance, 0) containerImage, err := r.GetContainerImage(ctx, workflowOverrideParams["ContainerImage"], instance) @@ -245,8 +242,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) return rbacResult, nil } // Service account, role, binding - end - - jobDef := ansibletest.Job( + podDef := ansibletest.Pod( instance, serviceLabels, jobName, @@ -258,15 +254,8 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) containerImage, privileged, ) - ansibleTestsJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = ansibleTestsJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { // Creation of the ansibleTests job was not successfull. // Release the lock and allow other controllers to spawn @@ -299,7 +288,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) func (r *AnsibleTestReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.AnsibleTest{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) diff --git a/controllers/common.go b/controllers/common.go index 4abed53b..342de2a0 100644 --- a/controllers/common.go +++ b/controllers/common.go @@ -17,7 +17,6 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/util" v1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "gopkg.in/yaml.v3" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" @@ -27,6 +26,7 @@ import ( "k8s.io/client-go/kubernetes" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) const ( @@ -80,13 +80,13 @@ const ( // to change Wait = iota - // CreateFirstJob indicates that the Reconcile loop should create the first job + // CreateFirstPod indicates that the Reconcile loop should create the first job // either specified in the .Spec section or in the .Spec.Workflow section. - CreateFirstJob + CreateFirstPod - // CreateNextJob indicates that the Reconcile loop should create a next job + // CreateNextPod indicates that the Reconcile loop should create a next job // specified in the .Spec.Workflow section (if .Spec.Workflow is defined) - CreateNextJob + CreateNextPod // EndTesting indicates that all jobs have already finished. The Reconcile // loop should end the testing and release resources that are required to @@ -97,6 +97,47 @@ const ( Failure ) +// GetPod returns pod that has a specific name (podName) in a given namespace +// (podNamespace). +func (r *Reconciler) GetPod( + ctx context.Context, + podName string, + podNamespace string, +) (*corev1.Pod, error) { + pod := &corev1.Pod{} + objectKey := client.ObjectKey{Namespace: podNamespace, Name: podName} + if err := r.Client.Get(ctx, objectKey, pod); err != nil { + return pod, err + } + + return pod, nil +} + +// CreatePod creates a pod based on a spec provided via PodSpec. +func (r *Reconciler) CreatePod( + ctx context.Context, + h helper.Helper, + podSpec *corev1.Pod, +) (ctrl.Result, error) { + _, err := r.GetPod(ctx, podSpec.Name, podSpec.Namespace) + if err == nil { + return ctrl.Result{}, nil + } else if !k8s_errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + err = controllerutil.SetControllerReference(h.GetBeforeObject(), podSpec, r.GetScheme()) + if err != nil { + return ctrl.Result{}, err + } + + if err := r.Client.Create(ctx, podSpec); err != nil { + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + // NextAction indicates what action needs to be performed by the Reconcile loop // based on the current state of the OpenShift cluster. func (r *Reconciler) NextAction( @@ -104,52 +145,52 @@ func (r *Reconciler) NextAction( instance client.Object, workflowLength int, ) (NextAction, int, error) { - // Get the latest job. The latest job is job with the highest value stored + // Get the latest pod. The latest pod is pod with the highest value stored // in workflowStep label workflowStepIdx := 0 - lastJob, err := r.GetLastJob(ctx, instance) + lastPod, err := r.GetLastPod(ctx, instance) if err != nil { return Failure, workflowStepIdx, err } - // If there is a job associated with the current instance. - if lastJob != nil { - workflowStepIdx, err := strconv.Atoi(lastJob.Labels[workflowStepLabel]) + // If there is a pod associated with the current instance. + if lastPod != nil { + workflowStepIdx, err := strconv.Atoi(lastPod.Labels[workflowStepLabel]) if err != nil { return Failure, workflowStepIdx, err } - // If the last job is not in Failed or Succeded state -> Wait - lastJobFinished := (lastJob.Status.Failed + lastJob.Status.Succeeded) > 0 - if !lastJobFinished { + // If the last pod is not in Failed or Succeded state -> Wait + lastPodFinished := lastPod.Status.Phase == corev1.PodFailed || lastPod.Status.Phase == corev1.PodSucceeded + if !lastPodFinished { return Wait, workflowStepIdx, nil } - // If the last job is in Failed or Succeeded state and it is NOT the last - // job which was supposed to be created -> CreateNextJob - if lastJobFinished && !isLastJobIndex(workflowStepIdx, workflowLength) { + // If the last pod is in Failed or Succeeded state and it is NOT the last + // pod which was supposed to be created -> CreateNextPod + if lastPodFinished && !isLastPodIndex(workflowStepIdx, workflowLength) { workflowStepIdx++ - return CreateNextJob, workflowStepIdx, nil + return CreateNextPod, workflowStepIdx, nil } - // Otherwise if the job is in Failed or Succeded stated and it IS the - // last job -> EndTesting - if lastJobFinished && isLastJobIndex(workflowStepIdx, workflowLength) { + // Otherwise if the pod is in Failed or Succeded stated and it IS the + // last pod -> EndTesting + if lastPodFinished && isLastPodIndex(workflowStepIdx, workflowLength) { return EndTesting, workflowStepIdx, nil } } - // If there is not any job associated with the instance -> createFirstJob - if lastJob == nil { - return CreateFirstJob, workflowStepIdx, nil + // If there is not any pod associated with the instance -> createFirstPod + if lastPod == nil { + return CreateFirstPod, workflowStepIdx, nil } return Failure, workflowStepIdx, nil } -// isLastJobIndex returns true when jobIndex is the index of the last job that +// isLastPodIndex returns true when jobIndex is the index of the last job that // should be executed. Otherwise the return value is false. -func isLastJobIndex(jobIndex int, workflowLength int) bool { +func isLastPodIndex(jobIndex int, workflowLength int) bool { switch workflowLength { case 0: return jobIndex == workflowLength @@ -160,26 +201,26 @@ func isLastJobIndex(jobIndex int, workflowLength int) bool { // GetLastJob returns job associated with an instance which has the highest value // stored in the workflowStep label -func (r *Reconciler) GetLastJob( +func (r *Reconciler) GetLastPod( ctx context.Context, instance client.Object, -) (*batchv1.Job, error) { +) (*corev1.Pod, error) { labels := map[string]string{instanceNameLabel: instance.GetName()} namespaceListOpt := client.InNamespace(instance.GetNamespace()) labelsListOpt := client.MatchingLabels(labels) - jobList := &batchv1.JobList{} - err := r.Client.List(ctx, jobList, namespaceListOpt, labelsListOpt) + podList := &corev1.PodList{} + err := r.Client.List(ctx, podList, namespaceListOpt, labelsListOpt) if err != nil { return nil, err } - var maxJob *batchv1.Job + var maxJob *corev1.Pod maxJobWorkflowStep := 0 - for _, job := range jobList.Items { + for _, job := range podList.Items { workflowStep, err := strconv.Atoi(job.Labels[workflowStepLabel]) if err != nil { - return &batchv1.Job{}, err + return &corev1.Pod{}, err } if workflowStep >= maxJobWorkflowStep { @@ -307,7 +348,7 @@ func (r *Reconciler) GetContainerImage( return "", nil } -func (r *Reconciler) GetJobName(instance interface{}, workflowStepNum int) string { +func (r *Reconciler) GetPodName(instance interface{}, workflowStepNum int) string { if typedInstance, ok := instance.(*v1beta1.Tobiko); ok { if len(typedInstance.Spec.Workflow) == 0 || workflowStepNum == workflowStepNumInvalid { return typedInstance.Name @@ -552,11 +593,11 @@ func (r *Reconciler) ReleaseLock(ctx context.Context, instance client.Object) (b return false, errors.New("failed to delete test-operator-lock") } -func (r *Reconciler) JobExists(ctx context.Context, instance client.Object, workflowStepNum int) bool { - job := &batchv1.Job{} - jobName := r.GetJobName(instance, workflowStepNum) - objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: jobName} - err := r.Client.Get(ctx, objectKey, job) +func (r *Reconciler) PodExists(ctx context.Context, instance client.Object, workflowStepNum int) bool { + pod := &corev1.Pod{} + podName := r.GetPodName(instance, workflowStepNum) + objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: podName} + err := r.Client.Get(ctx, objectKey, pod) if err != nil && k8s_errors.IsNotFound(err) { return false } diff --git a/controllers/horizontest_controller.go b/controllers/horizontest_controller.go index 62b8e021..5b8a320a 100644 --- a/controllers/horizontest_controller.go +++ b/controllers/horizontest_controller.go @@ -20,18 +20,15 @@ import ( "context" "errors" "fmt" - "time" "github.com/go-logr/logr" "github.com/openstack-k8s-operators/lib-common/modules/common" "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/horizontest" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -154,7 +151,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -163,7 +160,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This is useful to check if for // example somebody / something deleted the lock and it got claimed by // another instance. This is considered to be an error state. @@ -224,7 +221,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Prepare HorizonTest env vars envVars := r.PrepareHorizonTestEnvVars(instance) - jobName := r.GetJobName(instance, 0) + jobName := r.GetPodName(instance, 0) logsPVCName := r.GetPVCLogsName(instance, 0) containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance) if err != nil { @@ -240,8 +237,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) return rbacResult, nil } // Service account, role, binding - end - - jobDef := horizontest.Job( + podDef := horizontest.Pod( instance, serviceLabels, jobName, @@ -252,15 +248,8 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) envVars, containerImage, ) - horizontestJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = horizontestJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { instance.Status.Conditions.Set(condition.FalseCondition( condition.DeploymentReadyCondition, @@ -286,7 +275,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) func (r *HorizonTestReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.HorizonTest{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) diff --git a/controllers/tempest_controller.go b/controllers/tempest_controller.go index d82ff242..c30fe123 100644 --- a/controllers/tempest_controller.go +++ b/controllers/tempest_controller.go @@ -29,14 +29,12 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" "github.com/openstack-k8s-operators/lib-common/modules/common/labels" nad "github.com/openstack-k8s-operators/lib-common/modules/common/networkattachment" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" "github.com/openstack-k8s-operators/lib-common/modules/common/util" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/tempest" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -178,7 +176,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -187,7 +185,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This is useful to check if for // example somebody / something deleted the lock and it got claimed by // another instance. This is considered to be an error state. @@ -287,7 +285,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re } // NetworkAttachments - if r.JobExists(ctx, instance, nextWorkflowStep) { + if r.PodExists(ctx, instance, nextWorkflowStep) { networkReady, networkAttachmentStatus, err := nad.VerifyNetworkStatusFromAnnotation( ctx, helper, @@ -323,7 +321,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re mountCerts := r.CheckSecretExists(ctx, instance, "combined-ca-bundle") customDataConfigMapName := GetCustomDataConfigMapName(instance, nextWorkflowStep) EnvVarsConfigMapName := GetEnvVarsConfigMapName(instance, nextWorkflowStep) - jobName := r.GetJobName(instance, nextWorkflowStep) + jobName := r.GetPodName(instance, nextWorkflowStep) logsPVCName := r.GetPVCLogsName(instance, workflowStepNum) containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance) if err != nil { @@ -356,7 +354,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re } } - jobDef := tempest.Job( + podDef := tempest.Pod( instance, serviceLabels, serviceAnnotations, @@ -368,15 +366,8 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re mountSSHKey, containerImage, ) - tempestJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = tempestJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { // Creation of the tempest job was not successfull. // Release the lock and allow other controllers to spawn @@ -425,7 +416,7 @@ func (r *TempestReconciler) reconcileDelete( func (r *TempestReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.Tempest{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) @@ -504,7 +495,7 @@ func (r *TempestReconciler) setTempestConfigVars(envVars map[string]string, envVars["TEMPEST_EXTERNAL_PLUGIN_REFSPEC"] += externalPluginDictionary.ChangeRefspec + "," } - envVars["TEMPEST_WORKFLOW_STEP_DIR_NAME"] = r.GetJobName(instance, workflowStepNum) + envVars["TEMPEST_WORKFLOW_STEP_DIR_NAME"] = r.GetPodName(instance, workflowStepNum) extraImages := mergeWithWorkflow(tRun.ExtraImages, wtRun.ExtraImages) for _, extraImageDict := range extraImages { diff --git a/controllers/tobiko_controller.go b/controllers/tobiko_controller.go index ee602985..e50d5087 100644 --- a/controllers/tobiko_controller.go +++ b/controllers/tobiko_controller.go @@ -30,13 +30,11 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" nad "github.com/openstack-k8s-operators/lib-common/modules/common/networkattachment" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" "github.com/openstack-k8s-operators/lib-common/modules/common/util" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/tobiko" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -165,7 +163,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -174,7 +172,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This needs to be checked in order // to prevent situation when somebody / something deleted the lock and it // got claimedy by another instance. @@ -261,7 +259,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res } // NetworkAttachments - if r.JobExists(ctx, instance, nextWorkflowStep) { + if r.PodExists(ctx, instance, nextWorkflowStep) { networkReady, networkAttachmentStatus, err := nad.VerifyNetworkStatusFromAnnotation( ctx, helper, @@ -310,7 +308,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res // Prepare Tobiko env vars envVars := r.PrepareTobikoEnvVars(ctx, serviceLabels, instance, helper, nextWorkflowStep) - jobName := r.GetJobName(instance, nextWorkflowStep) + jobName := r.GetPodName(instance, nextWorkflowStep) logsPVCName := r.GetPVCLogsName(instance, workflowStepNum) containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance) privileged := r.OverwriteValueWithWorkflow(instance.Spec, "Privileged", "pbool", nextWorkflowStep).(bool) @@ -328,7 +326,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res } // Service account, role, binding - end - jobDef := tobiko.Job( + podDef := tobiko.Job( instance, serviceLabels, serviceAnnotations, @@ -341,15 +339,8 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res containerImage, privileged, ) - tobikoJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = tobikoJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { instance.Status.Conditions.Set(condition.FalseCondition( condition.DeploymentReadyCondition, @@ -375,7 +366,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res func (r *TobikoReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.Tobiko{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) @@ -410,7 +401,7 @@ func (r *TobikoReconciler) PrepareTobikoEnvVars( // Prepare env vars envVars := make(map[string]env.Setter) envVars["USE_EXTERNAL_FILES"] = env.SetValue("True") - envVars["TOBIKO_LOGS_DIR_NAME"] = env.SetValue(r.GetJobName(instance, step)) + envVars["TOBIKO_LOGS_DIR_NAME"] = env.SetValue(r.GetPodName(instance, step)) testenv := r.OverwriteValueWithWorkflow(instance.Spec, "Testenv", "string", step).(string) envVars["TOBIKO_TESTENV"] = env.SetValue(testenv) diff --git a/pkg/ansibletest/job.go b/pkg/ansibletest/job.go index b88e9aab..ab06f32b 100644 --- a/pkg/ansibletest/job.go +++ b/pkg/ansibletest/job.go @@ -5,13 +5,12 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Job - prepare job to run AnsibleTests tests -func Job( +// Pod - prepare job to run AnsibleTests tests +func Pod( instance *testv1beta1.AnsibleTest, labels map[string]string, jobName string, @@ -22,67 +21,54 @@ func Job( externalWorkflowCounter int, containerImage string, privileged bool, -) *batchv1.Job { +) *corev1.Pod { runAsUser := int64(227) runAsGroup := int64(227) - parallelism := int32(1) - completions := int32(1) capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"} securityContext := util.GetSecurityContext(runAsUser, capabilities, privileged) - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, Namespace: instance.Namespace, Labels: labels, }, - Spec: batchv1.JobSpec{ - Parallelism: ¶llelism, - Completions: &completions, - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - }, - Spec: corev1.PodSpec{ - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ - { - Name: instance.Name, - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, instance, externalWorkflowCounter), - SecurityContext: &securityContext, - }, - }, - Volumes: GetVolumes( - instance, - logsPVCName, - mountCerts, - workflowOverrideParams, - externalWorkflowCounter, - ), + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name, + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, instance, externalWorkflowCounter), + SecurityContext: &securityContext, }, }, + Volumes: GetVolumes( + instance, + logsPVCName, + mountCerts, + workflowOverrideParams, + externalWorkflowCounter, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } diff --git a/pkg/horizontest/job.go b/pkg/horizontest/job.go index 5a9123d5..6a166d13 100644 --- a/pkg/horizontest/job.go +++ b/pkg/horizontest/job.go @@ -5,13 +5,12 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Job - prepare job to run Horizon tests -func Job( +// Pod - prepare job to run Horizon tests +func Pod( instance *testv1beta1.HorizonTest, labels map[string]string, jobName string, @@ -21,66 +20,53 @@ func Job( mountKubeconfig bool, envVars map[string]env.Setter, containerImage string, -) *batchv1.Job { +) *corev1.Pod { runAsUser := int64(42455) runAsGroup := int64(42455) - parallelism := int32(1) - completions := int32(1) capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"} securityContext := util.GetSecurityContext(runAsUser, capabilities, instance.Spec.Privileged) - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, Namespace: instance.Namespace, Labels: labels, }, - Spec: batchv1.JobSpec{ - Parallelism: ¶llelism, - Completions: &completions, - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - }, - Spec: corev1.PodSpec{ - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ - { - Name: instance.Name, - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), - SecurityContext: &securityContext, - }, - }, - Volumes: GetVolumes( - instance, - logsPVCName, - mountCerts, - mountKubeconfig, - ), + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name, + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), + SecurityContext: &securityContext, }, }, + Volumes: GetVolumes( + instance, + logsPVCName, + mountCerts, + mountKubeconfig, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } diff --git a/pkg/tempest/job.go b/pkg/tempest/job.go index 6e73a112..c7c7c052 100644 --- a/pkg/tempest/job.go +++ b/pkg/tempest/job.go @@ -5,13 +5,12 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Job - prepare job to run Tempest tests -func Job( +// Pod - prepare pod to run Tempest tests +func Pod( instance *testv1beta1.Tempest, labels map[string]string, annotations map[string]string, @@ -22,79 +21,70 @@ func Job( mountCerts bool, mountSSHKey bool, containerImage string, -) *batchv1.Job { +) *corev1.Pod { envVars := map[string]env.Setter{} runAsUser := int64(42480) runAsGroup := int64(42480) securityContext := util.GetSecurityContext(runAsUser, []corev1.Capability{}, instance.Spec.Privileged) - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: jobName, - Namespace: instance.Namespace, - Labels: labels, + Annotations: annotations, + Name: jobName, + Namespace: instance.Namespace, + Labels: labels, }, - Spec: batchv1.JobSpec{ - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: annotations, - Labels: labels, - }, - Spec: corev1.PodSpec{ - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name + "-tests-runner", + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, mountSSHKey, instance), + SecurityContext: &securityContext, + EnvFrom: []corev1.EnvFromSource{ { - Name: instance.Name + "-tests-runner", - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, mountSSHKey, instance), - SecurityContext: &securityContext, - EnvFrom: []corev1.EnvFromSource{ - { - ConfigMapRef: &corev1.ConfigMapEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: customDataConfigMapName, - }, - }, + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: customDataConfigMapName, }, - { - ConfigMapRef: &corev1.ConfigMapEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: envVarsConfigMapName, - }, - }, + }, + }, + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: envVarsConfigMapName, }, }, }, }, - Volumes: GetVolumes( - instance, - customDataConfigMapName, - logsPVCName, - mountCerts, - mountSSHKey, - ), }, }, + Volumes: GetVolumes( + instance, + customDataConfigMapName, + logsPVCName, + mountCerts, + mountSSHKey, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } diff --git a/pkg/tobiko/job.go b/pkg/tobiko/job.go index 21c2f971..b8b51344 100644 --- a/pkg/tobiko/job.go +++ b/pkg/tobiko/job.go @@ -5,7 +5,6 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -23,70 +22,55 @@ func Job( envVars map[string]env.Setter, containerImage string, privileged bool, -) *batchv1.Job { +) *corev1.Pod { runAsUser := int64(42495) runAsGroup := int64(42495) - parallelism := int32(1) - completions := int32(1) capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"} securityContext := util.GetSecurityContext(runAsUser, capabilities, privileged) - // Note(lpiwowar): Once the webhook is implemented move all the logic of merging - // the workflows there. - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: jobName, - Namespace: instance.Namespace, - Labels: labels, + Annotations: annotations, + Name: jobName, + Namespace: instance.Namespace, + Labels: labels, }, - Spec: batchv1.JobSpec{ - Parallelism: ¶llelism, - Completions: &completions, - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: annotations, - Labels: labels, - }, - Spec: corev1.PodSpec{ - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ - { - Name: instance.Name, - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), - SecurityContext: &securityContext, - }, - }, - Volumes: GetVolumes( - instance, - logsPVCName, - mountCerts, - mountKeys, - mountKubeconfig, - ), + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name, + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), + SecurityContext: &securityContext, }, }, + Volumes: GetVolumes( + instance, + logsPVCName, + mountCerts, + mountKeys, + mountKubeconfig, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } From cd18a0ba547c21b4500f9bca24374a4a65dfd3ce Mon Sep 17 00:00:00 2001 From: Lukas Piwowarski Date: Thu, 12 Dec 2024 12:02:17 -0500 Subject: [PATCH 2/2] Rename job.go to pod.go We dropped the usage of Jobs in test-operator. Let's rename the file to reflect this change. --- pkg/ansibletest/{job.go => pod.go} | 0 pkg/horizontest/{job.go => pod.go} | 0 pkg/tempest/{job.go => pod.go} | 0 pkg/tobiko/{job.go => pod.go} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename pkg/ansibletest/{job.go => pod.go} (100%) rename pkg/horizontest/{job.go => pod.go} (100%) rename pkg/tempest/{job.go => pod.go} (100%) rename pkg/tobiko/{job.go => pod.go} (100%) diff --git a/pkg/ansibletest/job.go b/pkg/ansibletest/pod.go similarity index 100% rename from pkg/ansibletest/job.go rename to pkg/ansibletest/pod.go diff --git a/pkg/horizontest/job.go b/pkg/horizontest/pod.go similarity index 100% rename from pkg/horizontest/job.go rename to pkg/horizontest/pod.go diff --git a/pkg/tempest/job.go b/pkg/tempest/pod.go similarity index 100% rename from pkg/tempest/job.go rename to pkg/tempest/pod.go diff --git a/pkg/tobiko/job.go b/pkg/tobiko/pod.go similarity index 100% rename from pkg/tobiko/job.go rename to pkg/tobiko/pod.go