diff --git a/.github/workflows/kindIntegTest.yml b/.github/workflows/kindIntegTest.yml index 4baae2c4..0548447e 100644 --- a/.github/workflows/kindIntegTest.yml +++ b/.github/workflows/kindIntegTest.yml @@ -168,7 +168,7 @@ jobs: - webhook_validation # Three worker tests: - canary_upgrade - # - config_change_condition # config_change takes care of testing the same + - config_change_condition #- cdc_successful # OSS only # - delete_node_lost_readiness # DSE specific behavior - host_network diff --git a/.github/workflows/workflow-integration-tests.yaml b/.github/workflows/workflow-integration-tests.yaml index bb36d1e7..cdb5b537 100644 --- a/.github/workflows/workflow-integration-tests.yaml +++ b/.github/workflows/workflow-integration-tests.yaml @@ -183,7 +183,7 @@ jobs: - webhook_validation # Three worker tests: # - canary_upgrade # See kind_40_tests job - # - config_change_condition # config_change takes care of the same testing + - config_change_condition # - cdc_successful # CDC is OSS only , see kind_311_tests and kind_40_tests jobs # - delete_node_lost_readiness # DSE specific behavior see kind_dse_tests job - host_network diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bb82d87..641b6917 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [FEATURE] [#583](https://github.com/k8ssandra/cass-operator/issues/583) If there are pods in failed state (CrashLoopBackOff, ImagePullBackOff or ErrImagePull), restartCount of a container/initContainer is more than zero with termination code >0 or we have a SchedulingFailed event, allow StatefulSet updates even if previous ones haven't been rolled yet. ForceUpgradeRacks will no longer remove itself from the CassandraDatacenter to prevent self modifying Spec. + ## v1.23.0 * [CHANGE] [#720](https://github.com/k8ssandra/cass-operator/issues/720) Always use ObjectMeta.Name for the PodDisruptionBudget resource name, not the DatacenterName diff --git a/Makefile b/Makefile index 74b58f2c..f9b975ae 100644 --- a/Makefile +++ b/Makefile @@ -242,12 +242,12 @@ OPM ?= $(LOCALBIN)/opm ## Tool Versions CERT_MANAGER_VERSION ?= v1.14.7 -KUSTOMIZE_VERSION ?= v5.4.2 +KUSTOMIZE_VERSION ?= v5.5.0 CONTROLLER_TOOLS_VERSION ?= v0.15.0 OPERATOR_SDK_VERSION ?= 1.35.0 HELM_VERSION ?= 3.14.2 OPM_VERSION ?= 1.38.0 -GOLINT_VERSION ?= 1.60.3 +GOLINT_VERSION ?= 1.62.2 .PHONY: cert-manager cert-manager: ## Install cert-manager to the cluster diff --git a/cmd/main.go b/cmd/main.go index 58bf99d2..8210fa17 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -26,11 +26,13 @@ import ( "go.uber.org/zap/zapcore" _ "k8s.io/client-go/plugin/pkg/client/auth" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -119,6 +121,8 @@ func main() { os.Exit(1) } + ctx := ctrl.SetupSignalHandler() + if err = (&controllers.CassandraDatacenterReconciler{ Client: mgr.GetClient(), Log: ctrl.Log.WithName("controllers").WithName("CassandraDatacenter"), @@ -152,8 +156,19 @@ func main() { os.Exit(1) } + if err := mgr.GetCache().IndexField(ctx, &corev1.Event{}, "involvedObject.name", func(obj client.Object) []string { + event := obj.(*corev1.Event) + if event.InvolvedObject.Kind == "Pod" { + return []string{event.InvolvedObject.Name} + } + return []string{} + }); err != nil { + setupLog.Error(err, "unable to set up event index") + os.Exit(1) + } + setupLog.Info("starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err := mgr.Start(ctx); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) } diff --git a/pkg/reconciliation/reconcile_racks.go b/pkg/reconciliation/reconcile_racks.go index 963be49f..5592d7a7 100644 --- a/pkg/reconciliation/reconcile_racks.go +++ b/pkg/reconciliation/reconcile_racks.go @@ -17,6 +17,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -166,6 +167,79 @@ func (rc *ReconciliationContext) CheckRackCreation() result.ReconcileResult { return result.Continue() } +func (rc *ReconciliationContext) failureModeDetection() bool { + for _, pod := range rc.dcPods { + if pod == nil { + continue + } + if pod.Status.Phase == corev1.PodPending { + if pod.Status.StartTime == nil || hasBeenXMinutes(5, pod.Status.StartTime.Time) { + // Pod has been over 5 minutes in Pending state. This can be normal, but lets see + // if we have some detected failures events like FailedScheduling + events := &corev1.EventList{} + if err := rc.Client.List(rc.Ctx, events, &client.ListOptions{Namespace: pod.Namespace, FieldSelector: fields.SelectorFromSet(fields.Set{"involvedObject.name": pod.Name})}); err != nil { + rc.ReqLogger.Error(err, "error getting events for pod", "pod", pod.Name) + return false + } + + for _, event := range events.Items { + if event.Reason == "FailedScheduling" { + rc.ReqLogger.Info("Found FailedScheduling event for pod", "pod", pod.Name) + // We have a failed scheduling event + return true + } + } + } + } + + // Pod could also be running / terminated, we need to find if any container is in crashing state + // Sadly, this state is ephemeral, so it can change between reconciliations + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil { + waitingReason := containerStatus.State.Waiting.Reason + if waitingReason == "CrashLoopBackOff" || + waitingReason == "ImagePullBackOff" || + waitingReason == "ErrImagePull" { + rc.ReqLogger.Info("Failing container state for pod", "pod", pod.Name, "reason", waitingReason) + // We have a container in a failing state + return true + } + } + if containerStatus.RestartCount > 2 { + if containerStatus.State.Terminated != nil { + if containerStatus.State.Terminated.ExitCode != 0 { + rc.ReqLogger.Info("Failing container state for pod", "pod", pod.Name, "exitCode", containerStatus.State.Terminated.ExitCode) + return true + } + } + } + } + // Check the same for initContainers + for _, containerStatus := range pod.Status.InitContainerStatuses { + if containerStatus.State.Waiting != nil { + waitingReason := containerStatus.State.Waiting.Reason + if waitingReason == "CrashLoopBackOff" || + waitingReason == "ImagePullBackOff" || + waitingReason == "ErrImagePull" { + // We have a container in a failing state + rc.ReqLogger.Info("Failing initcontainer state for pod", "pod", pod.Name, "reason", waitingReason) + return true + } + } + if containerStatus.RestartCount > 2 { + if containerStatus.State.Terminated != nil { + if containerStatus.State.Terminated.ExitCode != 0 { + rc.ReqLogger.Info("Failing initcontainer state for pod", "pod", pod.Name, "exitCode", containerStatus.State.Terminated.ExitCode) + return true + } + } + } + } + } + + return false +} + func (rc *ReconciliationContext) UpdateAllowed() bool { // HasAnnotation might require also checking if it's "once / always".. or then we need to validate those allowed values in the webhook return rc.Datacenter.GenerationChanged() || metav1.HasAnnotation(rc.Datacenter.ObjectMeta, api.UpdateAllowedAnnotation) @@ -300,13 +374,22 @@ func (rc *ReconciliationContext) CheckVolumeClaimSizes(statefulSet, desiredSts * return result.Continue() } -func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { +func (rc *ReconciliationContext) CheckRackPodTemplate(force bool) result.ReconcileResult { logger := rc.ReqLogger dc := rc.Datacenter logger.Info("reconcile_racks::CheckRackPodTemplate") for idx := range rc.desiredRackInformation { rackName := rc.desiredRackInformation[idx].RackName + if force { + forceRacks := dc.Spec.ForceUpgradeRacks + if len(forceRacks) > 0 { + if utils.IndexOfString(forceRacks, rackName) <= 0 { + continue + } + } + } + if dc.Spec.CanaryUpgrade && idx > 0 { logger. WithValues("rackName", rackName). @@ -323,9 +406,9 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { updatedReplicas = status.CurrentReplicas + status.UpdatedReplicas } - if statefulSet.Generation != status.ObservedGeneration || + if !force && (statefulSet.Generation != status.ObservedGeneration || status.Replicas != status.ReadyReplicas || - status.Replicas != updatedReplicas { + status.Replicas != updatedReplicas) { logger.Info( "waiting for upgrade to finish on statefulset", @@ -357,7 +440,7 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { return result.Error(err) } - if !utils.ResourcesHaveSameHash(statefulSet, desiredSts) && !rc.UpdateAllowed() { + if !force && !utils.ResourcesHaveSameHash(statefulSet, desiredSts) && !rc.UpdateAllowed() { logger. WithValues("rackName", rackName). Info("update is blocked, but statefulset needs an update. Marking datacenter as requiring update.") @@ -369,7 +452,7 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { return result.Continue() } - if !utils.ResourcesHaveSameHash(statefulSet, desiredSts) && rc.UpdateAllowed() { + if !utils.ResourcesHaveSameHash(statefulSet, desiredSts) && (force || rc.UpdateAllowed()) { logger. WithValues("rackName", rackName). Info("statefulset needs an update") @@ -397,7 +480,7 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { desiredSts.DeepCopyInto(statefulSet) rc.Recorder.Eventf(rc.Datacenter, corev1.EventTypeNormal, events.UpdatingRack, - "Updating rack %s", rackName) + "Updating rack %s", rackName, "force", force) if err := rc.setConditionStatus(api.DatacenterUpdating, corev1.ConditionTrue); err != nil { return result.Error(err) @@ -423,13 +506,17 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { } } - if err := rc.enableQuietPeriod(20); err != nil { - logger.Error( - err, - "Error when enabling quiet period") - return result.Error(err) + if !force { + if err := rc.enableQuietPeriod(20); err != nil { + logger.Error( + err, + "Error when enabling quiet period") + return result.Error(err) + } } + // TODO Do we really want to modify spec here? + // we just updated k8s and pods will be knocked out of ready state, so let k8s // call us back when these changes are done and the new pods are back to ready return result.Done() @@ -441,85 +528,22 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { } func (rc *ReconciliationContext) CheckRackForceUpgrade() result.ReconcileResult { - // This code is *very* similar to CheckRackPodTemplate(), but it's not an exact - // copy. Some 3 to 5 line parts could maybe be extracted into functions. - logger := rc.ReqLogger dc := rc.Datacenter - logger.Info("starting CheckRackForceUpgrade()") + logger := rc.ReqLogger + logger.Info("reconcile_racks::CheckRackForceUpgrade") + + // Datacenter configuration isn't healthy, we allow upgrades here before pods start + if rc.failureModeDetection() { + logger.Info("Failure detected, forcing CheckRackPodTemplate()") + return rc.CheckRackPodTemplate(true) + } forceRacks := dc.Spec.ForceUpgradeRacks if len(forceRacks) == 0 { return result.Continue() } - for idx, nextRack := range rc.desiredRackInformation { - rackName := rc.desiredRackInformation[idx].RackName - if utils.IndexOfString(forceRacks, rackName) >= 0 { - statefulSet := rc.statefulSets[idx] - - // have to use zero here, because each statefulset is created with no replicas - // in GetStatefulSetForRack() - desiredSts, err := newStatefulSetForCassandraDatacenter(statefulSet, rackName, dc, nextRack.NodeCount) - if err != nil { - logger.Error(err, "error calling newStatefulSetForCassandraDatacenter") - return result.Error(err) - } - - // Set the CassandraDatacenter as the owner and controller - err = setControllerReference( - rc.Datacenter, - desiredSts, - rc.Scheme) - if err != nil { - logger.Error(err, "error calling setControllerReference for statefulset", "desiredSts.Namespace", - desiredSts.Namespace, "desireSts.Name", desiredSts.Name) - return result.Error(err) - } - - // "fix" the replica count, and maintain labels and annotations the k8s admin may have set - desiredSts.Spec.Replicas = statefulSet.Spec.Replicas - desiredSts.Labels = utils.MergeMap(map[string]string{}, statefulSet.Labels, desiredSts.Labels) - desiredSts.Annotations = utils.MergeMap(map[string]string{}, statefulSet.Annotations, desiredSts.Annotations) - - desiredSts.DeepCopyInto(statefulSet) - - rc.Recorder.Eventf(rc.Datacenter, corev1.EventTypeNormal, events.UpdatingRack, - "Force updating rack %s", rackName) - - if err := rc.setConditionStatus(api.DatacenterUpdating, corev1.ConditionTrue); err != nil { - return result.Error(err) - } - - if err := setOperatorProgressStatus(rc, api.ProgressUpdating); err != nil { - return result.Error(err) - } - - logger.Info("Force updating statefulset pod specs", - "statefulSet", statefulSet, - ) - - if err := rc.Client.Update(rc.Ctx, statefulSet); err != nil { - if errors.IsInvalid(err) { - if err = rc.deleteStatefulSet(statefulSet); err != nil { - return result.Error(err) - } - } else { - return result.Error(err) - } - } - } - } - - dcPatch := client.MergeFrom(dc.DeepCopy()) - dc.Spec.ForceUpgradeRacks = nil - - if err := rc.Client.Patch(rc.Ctx, dc, dcPatch); err != nil { - logger.Error(err, "error patching datacenter to clear force upgrade") - return result.Error(err) - } - - logger.Info("done CheckRackForceUpgrade()") - return result.Done() + return rc.CheckRackPodTemplate(true) } func (rc *ReconciliationContext) deleteStatefulSet(statefulSet *appsv1.StatefulSet) error { @@ -2595,7 +2619,7 @@ func (rc *ReconciliationContext) ReconcileAllRacks() (reconcile.Result, error) { return recResult.Output() } - if recResult := rc.CheckRackPodTemplate(); recResult.Completed() { + if recResult := rc.CheckRackPodTemplate(false); recResult.Completed() { return recResult.Output() } diff --git a/pkg/reconciliation/reconcile_racks_test.go b/pkg/reconciliation/reconcile_racks_test.go index bf062e87..2ca2e65f 100644 --- a/pkg/reconciliation/reconcile_racks_test.go +++ b/pkg/reconciliation/reconcile_racks_test.go @@ -285,7 +285,7 @@ func TestCheckRackPodTemplate_SetControllerRefOnStatefulSet(t *testing.T) { } rc.Datacenter.Spec.PodTemplateSpec = podTemplateSpec - result = rc.CheckRackPodTemplate() + result = rc.CheckRackPodTemplate(false) assert.True(t, result.Completed()) assert.Equal(t, 1, invocations) @@ -312,7 +312,7 @@ func TestCheckRackPodTemplate_CanaryUpgrade(t *testing.T) { t.Fatalf("failed to add rack to cassandradatacenter: %s", err) } - result = rc.CheckRackPodTemplate() + result = rc.CheckRackPodTemplate(false) _, err := result.Output() assert.True(t, result.Completed()) @@ -323,7 +323,7 @@ func TestCheckRackPodTemplate_CanaryUpgrade(t *testing.T) { rc.Datacenter.Spec.ServerVersion = "6.8.44" partition := rc.Datacenter.Spec.CanaryUpgradeCount - result = rc.CheckRackPodTemplate() + result = rc.CheckRackPodTemplate(false) _, err = result.Output() assert.True(t, result.Completed()) @@ -352,7 +352,7 @@ func TestCheckRackPodTemplate_CanaryUpgrade(t *testing.T) { rc.Datacenter.Spec.CanaryUpgrade = false - result = rc.CheckRackPodTemplate() + result = rc.CheckRackPodTemplate(false) assert.True(t, result.Completed()) assert.NotEqual(t, expectedStrategy, rc.statefulSets[0].Spec.UpdateStrategy) } @@ -371,7 +371,7 @@ func TestCheckRackPodTemplate_GenerationCheck(t *testing.T) { rc.Datacenter.Status.ObservedGeneration = rc.Datacenter.Generation rc.Datacenter.Spec.ServerVersion = "6.8.44" - res = rc.CheckRackPodTemplate() + res = rc.CheckRackPodTemplate(false) assert.Equal(result.Continue(), res) cond, found := rc.Datacenter.GetCondition(api.DatacenterRequiresUpdate) assert.True(found) @@ -388,7 +388,7 @@ func TestCheckRackPodTemplate_GenerationCheck(t *testing.T) { metav1.SetMetaDataAnnotation(&rc.Datacenter.ObjectMeta, api.UpdateAllowedAnnotation, string(api.AllowUpdateAlways)) rc.Datacenter.Spec.ServerVersion = "6.8.44" // This needs to be reapplied, since we call Patch in the CheckRackPodTemplate() - res = rc.CheckRackPodTemplate() + res = rc.CheckRackPodTemplate(false) assert.True(res.Completed()) } @@ -441,7 +441,7 @@ func TestCheckRackPodTemplate_TemplateLabels(t *testing.T) { rc.statefulSets = make([]*appsv1.StatefulSet, len(rackInfo)) rc.statefulSets[0] = desiredStatefulSet - res := rc.CheckRackPodTemplate() + res := rc.CheckRackPodTemplate(false) require.Equal(result.Done(), res) rc.statefulSets[0].Status.ObservedGeneration = rc.statefulSets[0].Generation @@ -452,7 +452,7 @@ func TestCheckRackPodTemplate_TemplateLabels(t *testing.T) { // Now update the template and verify that the StatefulSet is updated rc.Datacenter.Spec.PodTemplateSpec.ObjectMeta.Labels["foo2"] = "baz" rc.Datacenter.Generation++ - res = rc.CheckRackPodTemplate() + res = rc.CheckRackPodTemplate(false) require.Equal(result.Done(), res) sts = &appsv1.StatefulSet{} @@ -2733,7 +2733,7 @@ func TestCheckRackPodTemplateWithVolumeExpansion(t *testing.T) { res := rc.CheckRackCreation() require.False(res.Completed(), "CheckRackCreation did not complete as expected") - require.Equal(result.Continue(), rc.CheckRackPodTemplate()) + require.Equal(result.Continue(), rc.CheckRackPodTemplate(false)) metav1.SetMetaDataAnnotation(&rc.Datacenter.ObjectMeta, api.AllowStorageChangesAnnotation, "true") require.NoError(rc.Client.Update(rc.Ctx, rc.Datacenter)) @@ -2757,11 +2757,11 @@ func TestCheckRackPodTemplateWithVolumeExpansion(t *testing.T) { require.NoError(rc.Client.Create(rc.Ctx, pvc)) } - require.Equal(result.Continue(), rc.CheckRackPodTemplate()) + require.Equal(result.Continue(), rc.CheckRackPodTemplate(false)) rc.Datacenter.Spec.StorageConfig.CassandraDataVolumeClaimSpec.Resources.Requests = map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi")} require.NoError(rc.Client.Update(rc.Ctx, rc.Datacenter)) - res = rc.CheckRackPodTemplate() + res = rc.CheckRackPodTemplate(false) _, err := res.Output() require.EqualError(err, "PVC resize requested, but StorageClass standard does not support expansion", "We should have an error, storageClass does not support expansion") @@ -2771,14 +2771,14 @@ func TestCheckRackPodTemplateWithVolumeExpansion(t *testing.T) { storageClass.AllowVolumeExpansion = ptr.To[bool](true) require.NoError(rc.Client.Update(rc.Ctx, storageClass)) - res = rc.CheckRackPodTemplate() + res = rc.CheckRackPodTemplate(false) require.Equal(result.Done(), res, "Recreating StS should throw us to silence period") require.NoError(rc.Client.Get(rc.Ctx, nsName, sts)) require.Equal(resource.MustParse("2Gi"), sts.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) // The fakeClient behavior does not prevent us from modifying the StS fields, so this test behaves unlike real world in that sense - res = rc.CheckRackPodTemplate() + res = rc.CheckRackPodTemplate(false) require.Equal(result.Continue(), res, "Recreating StS should throw us to silence period") } diff --git a/tests/config_change_condition/config_change_condition_suite_test.go b/tests/config_change_condition/config_change_condition_suite_test.go index 3a2a26d1..40e0c9cb 100644 --- a/tests/config_change_condition/config_change_condition_suite_test.go +++ b/tests/config_change_condition/config_change_condition_suite_test.go @@ -18,12 +18,13 @@ import ( ) var ( - testName = "Config change condition" - namespace = "test-config-change-condition" - dcName = "dc2" - dcYaml = "../testdata/default-single-rack-2-node-dc.yaml" - dcResource = fmt.Sprintf("CassandraDatacenter/%s", dcName) - ns = ginkgo_util.NewWrapper(testName, namespace) + testName = "Config change condition with failure" + namespace = "test-config-change-condition" + dcName = "dc1" + clusterName = "cluster1" + dcYaml = "../testdata/default-three-rack-three-node-dc-zones.yaml" + dcResource = fmt.Sprintf("CassandraDatacenter/%s", dcName) + ns = ginkgo_util.NewWrapper(testName, namespace) ) func TestLifecycle(t *testing.T) { @@ -55,22 +56,33 @@ var _ = Describe(testName, func() { ns.WaitForOperatorReady() - step := "creating a datacenter resource with 1 racks/2 nodes" + step := "creating a datacenter resource with 3 racks/3 nodes using unavailable zones" testFile, err := ginkgo_util.CreateTestFile(dcYaml) Expect(err).ToNot(HaveOccurred()) k := kubectl.ApplyFiles(testFile) ns.ExecAndLog(step, k) - ns.WaitForDatacenterReady(dcName) + // Wait for status to be Unschedulable + step = "waiting the nodes to be unschedulable" + json := `jsonpath={.status.conditions[?(@.type=="PodScheduled")].status}` + k = kubectl.Get(fmt.Sprintf("pod/%s-%s-r1-sts-0", clusterName, dcName)). + FormatOutput(json) + Expect(ns.WaitForOutputContains(k, "False", 30)).ToNot(HaveOccurred()) + + json = `jsonpath={.status.conditions[?(@.type=="PodScheduled")].reason}` + k = kubectl.Get(fmt.Sprintf("pod/%s-%s-r1-sts-0", clusterName, dcName)). + FormatOutput(json) + ns.WaitForOutputContainsAndLog(step, k, "Unschedulable", 30) - step = "change the config" - json := ginkgo_util.CreateTestJson("{\"spec\": {\"config\": {\"cassandra-yaml\": {\"roles_validity\": \"256000ms\"}, \"jvm-server-options\": {\"garbage_collector\": \"CMS\"}}}}") + step = "change the config by removing zones" + json = `{"spec": { "racks": [{"name": "r1"}, {"name": "r2"}, {"name": "r3"}]}}` k = kubectl.PatchMerge(dcResource, json) ns.ExecAndLog(step, k) ns.WaitForDatacenterCondition(dcName, "Updating", string(corev1.ConditionTrue)) ns.WaitForDatacenterCondition(dcName, "Updating", string(corev1.ConditionFalse)) + ns.WaitForDatacenterReady(dcName) ns.WaitForDatacenterOperatorProgress(dcName, "Ready", 1800) }) }) diff --git a/tests/testdata/default-three-rack-three-node-dc-zones.yaml b/tests/testdata/default-three-rack-three-node-dc-zones.yaml new file mode 100644 index 00000000..4f258b3b --- /dev/null +++ b/tests/testdata/default-three-rack-three-node-dc-zones.yaml @@ -0,0 +1,54 @@ +apiVersion: cassandra.datastax.com/v1beta1 +kind: CassandraDatacenter +metadata: + name: dc1 +spec: + clusterName: cluster1 + serverType: cassandra + serverVersion: "5.0.2" + managementApiAuth: + insecure: {} + size: 3 + storageConfig: + cassandraDataVolumeClaimSpec: + storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + racks: + - name: r1 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - europe-north1-a + - name: r2 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - europe-north1-b + - name: r3 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - europe-north1-c + config: + jvm-options: + initial_heap_size: "512m" + max_heap_size: "512m"