diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bb82d87..df1c98ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [ENHANCEMENT] [#737](https://github.com/k8ssandra/cass-operator/issues/737) Before issuing PVC deletion when deleting a datacenter, verify the PVCs that match the labels are not actually used by any pods. + ## v1.23.0 * [CHANGE] [#720](https://github.com/k8ssandra/cass-operator/issues/720) Always use ObjectMeta.Name for the PodDisruptionBudget resource name, not the DatacenterName diff --git a/cmd/main.go b/cmd/main.go index 58bf99d2..efbab670 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -26,11 +26,13 @@ import ( "go.uber.org/zap/zapcore" _ "k8s.io/client-go/plugin/pkg/client/auth" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -119,6 +121,8 @@ func main() { os.Exit(1) } + ctx := ctrl.SetupSignalHandler() + if err = (&controllers.CassandraDatacenterReconciler{ Client: mgr.GetClient(), Log: ctrl.Log.WithName("controllers").WithName("CassandraDatacenter"), @@ -143,6 +147,24 @@ func main() { os.Exit(1) } + if err := mgr.GetCache().IndexField(ctx, &corev1.Pod{}, "spec.volumes.persistentVolumeClaim.claimName", func(obj client.Object) []string { + pod, ok := obj.(*corev1.Pod) + if !ok { + return nil + } + + var pvcNames []string + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + pvcNames = append(pvcNames, volume.PersistentVolumeClaim.ClaimName) + } + } + return pvcNames + }); err != nil { + setupLog.Error(err, "unable to set up field indexer") + os.Exit(1) + } + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { setupLog.Error(err, "unable to set up health check") os.Exit(1) @@ -153,7 +175,7 @@ func main() { } setupLog.Info("starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err := mgr.Start(ctx); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) } diff --git a/internal/controllers/cassandra/suite_test.go b/internal/controllers/cassandra/suite_test.go index 711464e8..71125aac 100644 --- a/internal/controllers/cassandra/suite_test.go +++ b/internal/controllers/cassandra/suite_test.go @@ -28,6 +28,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "go.uber.org/zap/zapcore" + corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -97,7 +98,7 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) err = (&CassandraDatacenterReconciler{ - Client: k8sClient, + Client: k8sManager.GetClient(), Log: ctrl.Log.WithName("controllers").WithName("CassandraDatacenter"), Scheme: k8sManager.GetScheme(), Recorder: k8sManager.GetEventRecorderFor("cass-operator"), @@ -116,6 +117,21 @@ var _ = BeforeSuite(func() { }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) + Expect(k8sManager.GetCache().IndexField(ctx, &corev1.Pod{}, "spec.volumes.persistentVolumeClaim.claimName", func(obj client.Object) []string { + pod, ok := obj.(*corev1.Pod) + if !ok { + return nil + } + + var pvcNames []string + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + pvcNames = append(pvcNames, volume.PersistentVolumeClaim.ClaimName) + } + } + return pvcNames + })).ToNot(HaveOccurred()) + // Reduce the polling times and sleeps to speed up the tests cooldownPeriod = 1 * time.Millisecond minimumRequeueTime = 10 * time.Millisecond diff --git a/internal/envtest/statefulset_controller.go b/internal/envtest/statefulset_controller.go index af6977b2..f1185ec6 100644 --- a/internal/envtest/statefulset_controller.go +++ b/internal/envtest/statefulset_controller.go @@ -80,7 +80,7 @@ func (r *StatefulSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) // TODO Get existing pods and modify them . podList := &corev1.PodList{} - if err := r.Client.List(ctx, podList, client.MatchingLabels(sts.Spec.Template.Labels), client.InNamespace(req.Namespace)); err != nil { + if err := r.Client.List(ctx, podList, client.MatchingLabels(sts.Labels), client.InNamespace(req.Namespace)); err != nil { logger.Error(err, "Failed to list the pods belonging to this StatefulSet") return ctrl.Result{}, err } @@ -94,7 +94,7 @@ func (r *StatefulSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) if len(stsPods) > intendedReplicas { // We need to delete the pods.. - for i := len(stsPods) - 1; i > intendedReplicas; i-- { + for i := len(stsPods) - 1; i >= intendedReplicas; i-- { pod := stsPods[i] if err := r.Client.Delete(ctx, pod); err != nil { logger.Error(err, "Failed to delete extra pod from this StS") diff --git a/pkg/reconciliation/handler_test.go b/pkg/reconciliation/handler_test.go index f8bcb60c..fdd6dc34 100644 --- a/pkg/reconciliation/handler_test.go +++ b/pkg/reconciliation/handler_test.go @@ -5,16 +5,19 @@ package reconciliation import ( "fmt" + "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" - v1 "k8s.io/api/core/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -91,16 +94,33 @@ func TestProcessDeletion_FailedDelete(t *testing.T) { mockClient := mocks.NewClient(t) rc.Client = mockClient + rc.Datacenter.Spec.Size = 0 k8sMockClientList(mockClient, nil). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{} + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} - }) + }).Twice() + + k8sMockClientGet(mockClient, nil). + Run(func(args mock.Arguments) { + arg := args.Get(2).(*appsv1.StatefulSet) + arg.Spec.Replicas = ptr.To[int32](0) + }).Once() k8sMockClientDelete(mockClient, fmt.Errorf("")) @@ -131,16 +151,31 @@ func TestProcessDeletion(t *testing.T) { k8sMockClientList(mockClient, nil). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{} + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} - }) // ListPods + }).Twice() // ListPods k8sMockClientDelete(mockClient, nil) // Delete PVC k8sMockClientUpdate(mockClient, nil) // Remove dc finalizer + k8sMockClientGet(mockClient, nil). + Run(func(args mock.Arguments) { + arg := args.Get(2).(*appsv1.StatefulSet) + arg.Spec.Replicas = ptr.To[int32](0) + }).Once() emptySecretWatcher(t, rc) diff --git a/pkg/reconciliation/reconcile_datacenter.go b/pkg/reconciliation/reconcile_datacenter.go index ec8c11e8..cf065100 100644 --- a/pkg/reconciliation/reconcile_datacenter.go +++ b/pkg/reconciliation/reconcile_datacenter.go @@ -13,6 +13,7 @@ import ( storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -76,6 +77,30 @@ func (rc *ReconciliationContext) ProcessDeletion() result.ReconcileResult { // How could we have pods if we've decommissioned everything? return result.RequeueSoon(5) } + } else { + // This is small mini reconcile to make everything 0 sized before we finish deletion, but do not run decommission in Cassandra + rc.ReqLogger.Info("Proceeding with deletion, setting all StatefulSets to 0 replicas") + if err := rc.CalculateRackInformation(); err != nil { + return result.Error(err) + } + + if res := rc.CheckRackCreation(); res.Completed() { + return res + } + + waitingForRackScale := false + for _, sts := range rc.statefulSets { + currentReplicas := int(*sts.Spec.Replicas) + if currentReplicas > 0 { + waitingForRackScale = true + if err := rc.UpdateRackNodeCount(sts, 0); err != nil { + return result.Error(err) + } + } + } + if waitingForRackScale { + return result.RequeueSoon(5) + } } // Clean up annotation litter on the user Secrets @@ -125,6 +150,14 @@ func (rc *ReconciliationContext) deletePVCs() error { "numPVCs", len(persistentVolumeClaimList)) for _, pvc := range persistentVolumeClaimList { + + if isBeingUsed, err := rc.isBeingUsed(pvc); err != nil { + logger.Error(err, "Failed to check if PVC is being used") + return err + } else if isBeingUsed { + return fmt.Errorf("PersistentVolumeClaim %s is still being used by a pod", pvc.Name) + } + if err := rc.Client.Delete(rc.Ctx, &pvc); err != nil { logger.Error(err, "Failed to delete PVCs for cassandraDatacenter") return err @@ -138,6 +171,19 @@ func (rc *ReconciliationContext) deletePVCs() error { return nil } +func (rc *ReconciliationContext) isBeingUsed(pvc corev1.PersistentVolumeClaim) (bool, error) { + rc.ReqLogger.Info("reconciler::isBeingUsed") + + pods := &corev1.PodList{} + + if err := rc.Client.List(rc.Ctx, pods, &client.ListOptions{Namespace: pvc.Namespace, FieldSelector: fields.SelectorFromSet(fields.Set{"spec.volumes.persistentVolumeClaim.claimName": pvc.Name})}); err != nil { + rc.ReqLogger.Error(err, "error getting pods for pvc", "pvc", pvc.Name) + return false, err + } + + return len(pods.Items) > 0, nil +} + func (rc *ReconciliationContext) listPVCs(selector map[string]string) ([]corev1.PersistentVolumeClaim, error) { rc.ReqLogger.Info("reconciler::listPVCs") diff --git a/pkg/reconciliation/reconcile_datacenter_test.go b/pkg/reconciliation/reconcile_datacenter_test.go index ca30425d..e7d5f462 100644 --- a/pkg/reconciliation/reconcile_datacenter_test.go +++ b/pkg/reconciliation/reconcile_datacenter_test.go @@ -5,18 +5,20 @@ package reconciliation import ( "fmt" + "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/k8ssandra/cass-operator/pkg/mocks" ) @@ -30,13 +32,23 @@ func TestDeletePVCs(t *testing.T) { k8sMockClientList(mockClient, nil). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{} + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} - }) + }).Twice() k8sMockClientDelete(mockClient, nil) @@ -55,8 +67,8 @@ func TestDeletePVCs_FailedToList(t *testing.T) { k8sMockClientList(mockClient, fmt.Errorf("failed to list PVCs for CassandraDatacenter")). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, @@ -73,24 +85,22 @@ func TestDeletePVCs_FailedToList(t *testing.T) { func TestDeletePVCs_PVCsNotFound(t *testing.T) { rc, _, cleanupMockScr := setupTest() defer cleanupMockScr() + assert := assert.New(t) mockClient := mocks.NewClient(t) rc.Client = mockClient k8sMockClientList(mockClient, errors.NewNotFound(schema.GroupResource{}, "name")). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} }) - err := rc.deletePVCs() - if err != nil { - t.Fatalf("deletePVCs should not have failed") - } + assert.NoError(rc.deletePVCs()) } func TestDeletePVCs_FailedToDelete(t *testing.T) { @@ -102,13 +112,23 @@ func TestDeletePVCs_FailedToDelete(t *testing.T) { k8sMockClientList(mockClient, nil). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{} + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} - }) + }).Twice() k8sMockClientDelete(mockClient, fmt.Errorf("failed to delete")) @@ -120,6 +140,45 @@ func TestDeletePVCs_FailedToDelete(t *testing.T) { assert.EqualError(t, err, "failed to delete") } +func TestDeletePVCs_FailedToDeleteBeingUsed(t *testing.T) { + rc, _, cleanupMockScr := setupTest() + defer cleanupMockScr() + assert := assert.New(t) + + mockClient := mocks.NewClient(t) + rc.Client = mockClient + + k8sMockClientList(mockClient, nil). + Run(func(args mock.Arguments) { + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + }, + }, + } + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc-1", + }, + }} + }).Twice() + + err := rc.deletePVCs() + assert.Error(err) + assert.EqualError(err, "PersistentVolumeClaim pvc-1 is still being used by a pod") +} + func TestStorageExpansionNils(t *testing.T) { rc, _, cleanupMockScr := setupTest() defer cleanupMockScr() diff --git a/pkg/reconciliation/reconcile_racks.go b/pkg/reconciliation/reconcile_racks.go index 963be49f..5ca83058 100644 --- a/pkg/reconciliation/reconcile_racks.go +++ b/pkg/reconciliation/reconcile_racks.go @@ -586,6 +586,7 @@ func (rc *ReconciliationContext) CheckRackLabels() result.ReconcileResult { func (rc *ReconciliationContext) CheckRackStoppedState() result.ReconcileResult { logger := rc.ReqLogger + logger.Info("reconcile_racks::CheckRackStoppedState") emittedStoppingEvent := false racksUpdated := false