Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ansibleEE library instead of AnsibleEE v1 CR #972

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,6 @@ rules:
- patch
- update
- watch
- apiGroups:
- ansibleee.openstack.org
resources:
- openstackansibleees
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- barbican.openstack.org
resources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"time"

"github.com/go-playground/validator/v10"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -40,7 +41,6 @@ import (
condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition"
"github.com/openstack-k8s-operators/lib-common/modules/common/helper"
"github.com/openstack-k8s-operators/lib-common/modules/common/util"
ansibleeev1 "github.com/openstack-k8s-operators/openstack-ansibleee-operator/api/v1beta1"
dataplanev1 "github.com/openstack-k8s-operators/openstack-operator/apis/dataplane/v1beta1"
deployment "github.com/openstack-k8s-operators/openstack-operator/pkg/dataplane"
dataplaneutil "github.com/openstack-k8s-operators/openstack-operator/pkg/dataplane/util"
Expand All @@ -63,7 +63,6 @@ func (r *OpenStackDataPlaneDeploymentReconciler) GetLogger(ctx context.Context)
// +kubebuilder:rbac:groups=dataplane.openstack.org,resources=openstackdataplanedeployments/finalizers,verbs=update;patch
// +kubebuilder:rbac:groups=dataplane.openstack.org,resources=openstackdataplanenodesets,verbs=get;list;watch
// +kubebuilder:rbac:groups=dataplane.openstack.org,resources=openstackdataplaneservices,verbs=get;list;watch
// +kubebuilder:rbac:groups=ansibleee.openstack.org,resources=openstackansibleees,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=discovery.k8s.io,resources=endpointslices,verbs=get;list;watch;create;update;patch;delete;
// +kubebuilder:rbac:groups=cert-manager.io,resources=issuers,verbs=get;list;watch;
// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates,verbs=get;list;watch;create;update;patch;delete;
Expand Down Expand Up @@ -494,7 +493,7 @@ func (r *OpenStackDataPlaneDeploymentReconciler) SetupWithManager(mgr ctrl.Manag
predicate.GenerationChangedPredicate{},
predicate.AnnotationChangedPredicate{},
predicate.LabelChangedPredicate{}))).
Owns(&ansibleeev1.OpenStackAnsibleEE{}).
Owns(&batchv1.Job{}).
Watches(&certmgrv1.Certificate{},
handler.EnqueueRequestsFromMapFunc(certFn)).
Complete(r)
Expand Down
4 changes: 2 additions & 2 deletions controllers/dataplane/openstackdataplanenodeset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/go-playground/validator/v10"
"golang.org/x/exp/slices"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -48,7 +49,6 @@ import (
"github.com/openstack-k8s-operators/lib-common/modules/common/secret"
"github.com/openstack-k8s-operators/lib-common/modules/common/serviceaccount"
"github.com/openstack-k8s-operators/lib-common/modules/common/util"
ansibleeev1 "github.com/openstack-k8s-operators/openstack-ansibleee-operator/api/v1beta1"
baremetalv1 "github.com/openstack-k8s-operators/openstack-baremetal-operator/api/v1beta1"
openstackv1 "github.com/openstack-k8s-operators/openstack-operator/apis/core/v1beta1"
dataplanev1 "github.com/openstack-k8s-operators/openstack-operator/apis/dataplane/v1beta1"
Expand Down Expand Up @@ -622,7 +622,7 @@ func (r *OpenStackDataPlaneNodeSetReconciler) SetupWithManager(
predicate.GenerationChangedPredicate{},
predicate.AnnotationChangedPredicate{},
predicate.LabelChangedPredicate{}))).
Owns(&ansibleeev1.OpenStackAnsibleEE{}).
Owns(&batchv1.Job{}).
Owns(&baremetalv1.OpenStackBaremetalSet{}).
Owns(&infranetworkv1.IPSet{}).
Owns(&infranetworkv1.DNSData{}).
Expand Down
2 changes: 0 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ import (
neutronv1 "github.com/openstack-k8s-operators/neutron-operator/api/v1beta1"
novav1 "github.com/openstack-k8s-operators/nova-operator/api/v1beta1"
octaviav1 "github.com/openstack-k8s-operators/octavia-operator/api/v1beta1"
ansibleeev1 "github.com/openstack-k8s-operators/openstack-ansibleee-operator/api/v1beta1"
baremetalv1 "github.com/openstack-k8s-operators/openstack-baremetal-operator/api/v1beta1"
ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1"
placementv1 "github.com/openstack-k8s-operators/placement-operator/api/v1beta1"
Expand Down Expand Up @@ -108,7 +107,6 @@ func init() {
utilruntime.Must(neutronv1.AddToScheme(scheme))
utilruntime.Must(octaviav1.AddToScheme(scheme))
utilruntime.Must(designatev1.AddToScheme(scheme))
utilruntime.Must(ansibleeev1.AddToScheme(scheme))
utilruntime.Must(rabbitmqv1.AddToScheme(scheme))
utilruntime.Must(manilav1.AddToScheme(scheme))
utilruntime.Must(horizonv1.AddToScheme(scheme))
Expand Down
32 changes: 19 additions & 13 deletions pkg/dataplane/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import (
"sort"
"strconv"

batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"

slices "golang.org/x/exp/slices"
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -37,11 +40,9 @@ import (
"github.com/openstack-k8s-operators/lib-common/modules/common/helper"
"github.com/openstack-k8s-operators/lib-common/modules/common/util"
"github.com/openstack-k8s-operators/lib-common/modules/storage"
ansibleeev1 "github.com/openstack-k8s-operators/openstack-ansibleee-operator/api/v1beta1"
openstackv1 "github.com/openstack-k8s-operators/openstack-operator/apis/core/v1beta1"
dataplanev1 "github.com/openstack-k8s-operators/openstack-operator/apis/dataplane/v1beta1"
dataplaneutil "github.com/openstack-k8s-operators/openstack-operator/pkg/dataplane/util"
corev1 "k8s.io/api/core/v1"
)

// Deployer defines a data structure with all of the relevant objects required for a full deployment.
Expand Down Expand Up @@ -197,13 +198,13 @@ func (d *Deployer) ConditionalDeploy(
}

if nsConditions.IsFalse(readyCondition) {
var ansibleEE *ansibleeev1.OpenStackAnsibleEE
var ansibleEE *batchv1.Job
_, labelSelector := dataplaneutil.GetAnsibleExecutionNameAndLabels(&foundService, d.Deployment.Name, d.NodeSet.Name)
ansibleEE, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector)
if err != nil {
// Return nil if we don't have AnsibleEE available yet
if k8s_errors.IsNotFound(err) {
log.Info(fmt.Sprintf("%s OpenStackAnsibleEE not yet found", readyCondition))
log.Info(fmt.Sprintf("%s AnsibleEE job is not yet found", readyCondition))
return nil
}
log.Error(err, fmt.Sprintf("Error getting ansibleEE job for %s", deployName))
Expand All @@ -215,34 +216,39 @@ func (d *Deployer) ConditionalDeploy(
err.Error()))
}

if ansibleEE.Status.JobStatus == ansibleeev1.JobStatusSucceeded {
if ansibleEE.Status.Succeeded > 0 {
log.Info(fmt.Sprintf("Condition %s ready", readyCondition))
nsConditions.Set(condition.TrueCondition(
readyCondition,
readyMessage))
}

if ansibleEE.Status.JobStatus == ansibleeev1.JobStatusRunning || ansibleEE.Status.JobStatus == ansibleeev1.JobStatusPending {
log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Status: %s", ansibleEE.Name, ansibleEE.Status.JobStatus))
if ansibleEE.Status.Active > 0 {
log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleEE.Name, ansibleEE.Status.Active))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about pending job status? I guess we've to check status.active == 0 && status.succeeded == 0 && status.failed == 0?

Copy link
Contributor Author

@bshephar bshephar Sep 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, true. I don't think this condition makes sense actually. Because it checks for an Active and a Failed job.

We can probably drop the failed check from here since it's captured in the next section. So this would just become

if ansibleEE.Status.Active > 0 {
			log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleEE.Name, ansibleEE.Status.Active))
}

Then the next section will check for if it's failed. In this instance, we just want to log when a job is still active, if it had failed then we would want to log something different.

We don't really need to check Succeeded do we? Because if it's > 0 in the above check, then we will log that the job has been Completed. So, really, we only need to check to see if it's Active here since other conditions will be handled in the above and below checks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there would be scenario when all the three are zero (status.active == 0, status.succeeded == 0 and status.failed == 0) which is when the job is pending and we need to update the status with .readyWaitingMessage. I can't see how that is handled here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, yes I see what you mean. Like, if the pod for the job was in pending. I'll see what the job reports in that case to make sure we have it covered.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like even when the pod is pending, the job reports that it's Active:

❯ oc apply -f test-job.yaml; oc get job pi -o yaml | yq .status.active ; oc get po | grep pi
job.batch/pi created
1
pi-gpwd9                               0/1     Pending      0          0s

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I just added a nodeSelector that would result in the pod pending to test that:

❯ yq .spec.template.spec.nodeSelector test-job.yaml
kubernetes.io/arch: aarch64

Only amd64 nodes in the cluster.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I don't think it will ever be 0 for pending jobs, the Active status field for jobs represents "pending and running pods that aren't terminating":
https://github.com/kubernetes/kubernetes/blob/master/pkg/apis/batch/types.go#L524-L528

So this should always be 1 if the Job hasn't culminated in success or failure.

nsConditions.Set(condition.FalseCondition(
readyCondition,
condition.RequestedReason,
condition.SeverityInfo,
readyWaitingMessage))
}

if ansibleEE.Status.JobStatus == ansibleeev1.JobStatusFailed {
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s execution.status.jobstatus: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus)
ansibleCondition := ansibleEE.Status.Conditions.Get(condition.ReadyCondition)
var ansibleCondition *batchv1.JobCondition
if ansibleEE.Status.Failed > 0 {
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.Failed)
for _, condition := range ansibleEE.Status.Conditions {
if condition.Type == batchv1.JobFailed {
ansibleCondition = &condition
}
}
if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded {
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.status.jobstatus: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus)
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleCondition.Message)
}
log.Info(fmt.Sprintf("Condition %s error", readyCondition))
err = fmt.Errorf(errorMsg)
nsConditions.Set(condition.FalseCondition(
readyCondition,
ansibleCondition.Reason,
ansibleCondition.Severity,
condition.Reason(ansibleCondition.Reason),
condition.SeverityError,
readyErrorMessage,
err.Error()))
}
Expand Down
1 change: 0 additions & 1 deletion pkg/dataplane/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ func (d *Deployer) DeployService(foundService dataplanev1.OpenStackDataPlaneServ
d.InventorySecrets,
d.AeeSpec,
d.NodeSet)

if err != nil {
d.Helper.GetLogger().Error(err, fmt.Sprintf("Unable to execute Ansible for %s", foundService.Name))
return err
Expand Down
Loading