stackrox · johannes94 · May 4, 2023 · Apr 27, 2023 · Apr 27, 2023 · Apr 28, 2023
diff --git a/.github/workflows/rds.yaml b/.github/workflows/rds.yaml
@@ -32,6 +32,7 @@ on:
       - 'docs/**'
       - 'pkg/api/openapi/docs/**'
       - 'pkg/api/openapi/.openapi-generator-ignore'
+      - 'dp-terraform/**'
 
 jobs:
   verify-test:
@@ -67,4 +68,4 @@ jobs:
           AWS_AUTH_HELPER: "none"
         run: |
           ./dev/env/scripts/exec_fleetshard_sync.sh make test/rds
-        timeout-minutes: 35
+        timeout-minutes: 50
diff --git a/Makefile b/Makefile
@@ -326,7 +326,7 @@ test: $(GOTESTSUM_BIN)
 # Runs the AWS RDS integration tests.
 test/rds: $(GOTESTSUM_BIN)
 	RUN_RDS_TESTS=true \
-	$(GOTESTSUM_BIN) --junitfile data/results/rds-integration-tests.xml --format $(GOTESTSUM_FORMAT) -- -p 1 -v -timeout 30m -count=1 \
+	$(GOTESTSUM_BIN) --junitfile data/results/rds-integration-tests.xml --format $(GOTESTSUM_FORMAT) -- -p 1 -v -timeout 45m -count=1 \
 		./fleetshard/pkg/central/cloudprovider/awsclient/...
 .PHONY: test/rds
 

diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds.go
@@ -166,7 +166,6 @@ func (r *RDS) ensureInstanceDeleted(instanceID string) error {
 
 	if instanceStatus != dbDeletingStatus {
 		glog.Infof("Initiating deprovisioning of RDS database instance %s.", instanceID)
-		// TODO(ROX-13692): do not skip taking a final DB snapshot
 		_, err := r.rdsClient.DeleteDBInstance(newDeleteCentralDBInstanceInput(instanceID, true))
 		if err != nil {
 			return fmt.Errorf("deleting DB instance: %w", err)
@@ -187,8 +186,7 @@ func (r *RDS) ensureClusterDeleted(clusterID string) error {
 
 	if clusterStatus != dbDeletingStatus {
 		glog.Infof("Initiating deprovisioning of RDS database cluster %s.", clusterID)
-		// TODO(ROX-13692): do not skip taking a final DB snapshot
-		_, err := r.rdsClient.DeleteDBCluster(newDeleteCentralDBClusterInput(clusterID, true))
+		_, err := r.rdsClient.DeleteDBCluster(newDeleteCentralDBClusterInput(clusterID, false))
 		if err != nil {
 			return fmt.Errorf("deleting DB cluster: %w", err)
 		}
@@ -366,10 +364,16 @@ func newDeleteCentralDBInstanceInput(instanceID string, skipFinalSnapshot bool)
 }
 
 func newDeleteCentralDBClusterInput(clusterID string, skipFinalSnapshot bool) *rds.DeleteDBClusterInput {
-	return &rds.DeleteDBClusterInput{
+	input := &rds.DeleteDBClusterInput{
 		DBClusterIdentifier: aws.String(clusterID),
 		SkipFinalSnapshot:   aws.Bool(skipFinalSnapshot),
 	}
+
+	if !skipFinalSnapshot {
+		input.FinalDBSnapshotIdentifier = getFinalSnapshotID(clusterID)
+	}
+
+	return input
 }
 
 func newRdsClient(awsConfig config.AWS, auth fleetmanager.Auth) (*rds.RDS, error) {
@@ -395,6 +399,10 @@ func newRdsClient(awsConfig config.AWS, auth fleetmanager.Auth) (*rds.RDS, error
 	return rds.New(sess), nil
 }
 
+func getFinalSnapshotID(clusterID string) *string {
+	return aws.String(fmt.Sprintf("%s-%s", clusterID, "final"))
+}
+
 type tokenFetcher struct {
 	auth fleetmanager.Auth
 }

diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go
@@ -17,7 +17,7 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-const awsTimeoutMinutes = 15
+const awsTimeoutMinutes = 30
 
 func newTestRDS() (*RDS, error) {
 	rdsClient, err := newTestRDSClient()
@@ -47,7 +47,7 @@ func newTestRDSClient() (*rds.RDS, error) {
 
 func waitForClusterToBeDeleted(ctx context.Context, rdsClient *RDS, clusterID string) (bool, error) {
 	for {
-		clusterExists, clusterStatus, err := rdsClient.clusterStatus(clusterID)
+		clusterExists, _, err := rdsClient.clusterStatus(clusterID)
 		if err != nil {
 			return false, err
 		}
@@ -56,11 +56,6 @@ func waitForClusterToBeDeleted(ctx context.Context, rdsClient *RDS, clusterID st
 			return true, nil
 		}
 
-		// exit early if cluster is marked as deleting
-		if clusterStatus == dbDeletingStatus {
-			return true, nil
-		}
-
 		ticker := time.NewTicker(awsRetrySeconds * time.Second)
 		select {
 		case <-ticker.C:
@@ -71,6 +66,37 @@ func waitForClusterToBeDeleted(ctx context.Context, rdsClient *RDS, clusterID st
 	}
 }
 
+func waitForFinalSnapshotToExist(ctx context.Context, rdsClient *RDS, clusterID string) (bool, error) {
+
+	ticker := time.NewTicker(awsRetrySeconds * time.Second)
+	for {
+		select {
+		case <-ticker.C:
+			snapshotOut, err := rdsClient.rdsClient.DescribeDBClusterSnapshots(&rds.DescribeDBClusterSnapshotsInput{
+				DBClusterSnapshotIdentifier: getFinalSnapshotID(clusterID),
+			})
+
+			if err != nil {
+				if awsErr, ok := err.(awserr.Error); ok {
+					if awsErr.Code() != rds.ErrCodeDBClusterSnapshotNotFoundFault {
+						return false, err
+					}
+
+					continue
+				}
+			}
+
+			if snapshotOut != nil {
+				return len(snapshotOut.DBClusterSnapshots) == 1, nil
+			}
+		case <-ctx.Done():
+			return false, fmt.Errorf("waiting for final DB snapshot: %w", ctx.Err())
+		}
+
+	}
+
+}
+
 func TestRDSProvisioning(t *testing.T) {
 	if os.Getenv("RUN_RDS_TESTS") != "true" {
 		t.Skip("Skip RDS tests. Set RUN_RDS_TESTS=true env variable to enable RDS tests.")
@@ -136,6 +162,19 @@ func TestRDSProvisioning(t *testing.T) {
 	clusterDeleted, err := waitForClusterToBeDeleted(deleteCtx, rdsClient, clusterID)
 	require.NoError(t, err)
 	assert.True(t, clusterDeleted)
+
+	// Always attemt to delete the final snapshot if it exists
+	defer func() {
+		_, err := rdsClient.rdsClient.DeleteDBClusterSnapshot(
+			&rds.DeleteDBClusterSnapshotInput{DBClusterSnapshotIdentifier: getFinalSnapshotID(clusterID)},
+		)
+
+		assert.NoError(t, err)
+	}()
+
+	snapshotExists, err := waitForFinalSnapshotToExist(deleteCtx, rdsClient, clusterID)
+	require.NoError(t, err)
+	require.True(t, snapshotExists)
 }
 
 func TestGetDBConnection(t *testing.T) {