From ea754211dd9f866ba1a5d0770cc714401b8940da Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Thu, 27 Apr 2023 15:14:10 +0200 Subject: [PATCH 01/12] create final snapshot for db-clusters --- fleetshard/pkg/central/cloudprovider/awsclient/rds.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds.go index a0d232aff2..29ed05ecb5 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds.go @@ -188,7 +188,7 @@ func (r *RDS) ensureClusterDeleted(clusterID string) error { if clusterStatus != dbDeletingStatus { glog.Infof("Initiating deprovisioning of RDS database cluster %s.", clusterID) // TODO(ROX-13692): do not skip taking a final DB snapshot - _, err := r.rdsClient.DeleteDBCluster(newDeleteCentralDBClusterInput(clusterID, true)) + _, err := r.rdsClient.DeleteDBCluster(newDeleteCentralDBClusterInput(clusterID, false)) if err != nil { return fmt.Errorf("deleting DB cluster: %w", err) } @@ -366,10 +366,16 @@ func newDeleteCentralDBInstanceInput(instanceID string, skipFinalSnapshot bool) } func newDeleteCentralDBClusterInput(clusterID string, skipFinalSnapshot bool) *rds.DeleteDBClusterInput { - return &rds.DeleteDBClusterInput{ + input := &rds.DeleteDBClusterInput{ DBClusterIdentifier: aws.String(clusterID), SkipFinalSnapshot: aws.Bool(skipFinalSnapshot), } + + if !skipFinalSnapshot { + input.FinalDBSnapshotIdentifier = aws.String(fmt.Sprintf("%s-%s", clusterID, "final")) + } + + return input } func newRdsClient(awsConfig config.AWS, auth fleetmanager.Auth) (*rds.RDS, error) { From d4b48b70bf15ff5db0f0f8128aec1f48611ba347 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Thu, 27 Apr 2023 15:22:15 +0200 Subject: [PATCH 02/12] rds tests require final snapshot --- fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index ecc67b6453..e0460bde64 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -136,6 +136,12 @@ func TestRDSProvisioning(t *testing.T) { clusterDeleted, err := waitForClusterToBeDeleted(deleteCtx, rdsClient, clusterID) require.NoError(t, err) assert.True(t, clusterDeleted) + + // test that final snapshot was created + snapshotOut, err := rdsClient.rdsClient.DescribeDBSnapshots(&rds.DescribeDBSnapshotsInput{DBSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final"))}) + require.NoError(t, err) + require.NotNil(t, snapshotOut) + require.NotEmpty(t, snapshotOut.DBSnapshots) } func TestGetDBConnection(t *testing.T) { From 5c5d39d71c5adf4a5041d0282e9d6c2169f23033 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Fri, 28 Apr 2023 08:56:42 +0200 Subject: [PATCH 03/12] remove early exit of test wait for cluster to test final snapshot creation --- fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index e0460bde64..ad423ea049 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -47,7 +47,7 @@ func newTestRDSClient() (*rds.RDS, error) { func waitForClusterToBeDeleted(ctx context.Context, rdsClient *RDS, clusterID string) (bool, error) { for { - clusterExists, clusterStatus, err := rdsClient.clusterStatus(clusterID) + clusterExists, _, err := rdsClient.clusterStatus(clusterID) if err != nil { return false, err } @@ -56,11 +56,6 @@ func waitForClusterToBeDeleted(ctx context.Context, rdsClient *RDS, clusterID st return true, nil } - // exit early if cluster is marked as deleting - if clusterStatus == dbDeletingStatus { - return true, nil - } - ticker := time.NewTicker(awsRetrySeconds * time.Second) select { case <-ticker.C: From 2293d52befc13ef5e7a9b50da4e82fa220db509a Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Fri, 28 Apr 2023 08:58:07 +0200 Subject: [PATCH 04/12] format change --- fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index ad423ea049..3f6d0082d8 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -133,7 +133,9 @@ func TestRDSProvisioning(t *testing.T) { assert.True(t, clusterDeleted) // test that final snapshot was created - snapshotOut, err := rdsClient.rdsClient.DescribeDBSnapshots(&rds.DescribeDBSnapshotsInput{DBSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final"))}) + snapshotOut, err := rdsClient.rdsClient.DescribeDBSnapshots(&rds.DescribeDBSnapshotsInput{ + DBSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final")), + }) require.NoError(t, err) require.NotNil(t, snapshotOut) require.NotEmpty(t, snapshotOut.DBSnapshots) From 3d26f7dc59d5d66522d9cbc8cf50b2c6a995c43f Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Fri, 28 Apr 2023 10:24:07 +0200 Subject: [PATCH 05/12] wait for final snapshot in test --- .../cloudprovider/awsclient/rds_test.go | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index 3f6d0082d8..461b04b89d 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -66,6 +66,37 @@ func waitForClusterToBeDeleted(ctx context.Context, rdsClient *RDS, clusterID st } } +func waitForFinalSnapshotToExist(ctx context.Context, rdsClient *RDS, clusterID string) (bool, error) { + + ticker := time.NewTicker(awsRetrySeconds * time.Second) + for { + select { + case <-ticker.C: + snapshotOut, err := rdsClient.rdsClient.DescribeDBSnapshots(&rds.DescribeDBSnapshotsInput{ + DBSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final")), + }) + + if err != nil { + if awsErr, ok := err.(awserr.Error); ok { + if awsErr.Code() != rds.ErrCodeDBClusterSnapshotNotFoundFault { + return false, err + } + + continue + } + } + + if snapshotOut != nil { + return len(snapshotOut.DBSnapshots) == 1, nil + } + case <-ctx.Done(): + return false, fmt.Errorf("waiting for final DB snapshot: %w", ctx.Err()) + } + + } + +} + func TestRDSProvisioning(t *testing.T) { if os.Getenv("RUN_RDS_TESTS") != "true" { t.Skip("Skip RDS tests. Set RUN_RDS_TESTS=true env variable to enable RDS tests.") @@ -133,12 +164,9 @@ func TestRDSProvisioning(t *testing.T) { assert.True(t, clusterDeleted) // test that final snapshot was created - snapshotOut, err := rdsClient.rdsClient.DescribeDBSnapshots(&rds.DescribeDBSnapshotsInput{ - DBSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final")), - }) + snapshotExists, err := waitForFinalSnapshotToExist(deleteCtx, rdsClient, clusterID) require.NoError(t, err) - require.NotNil(t, snapshotOut) - require.NotEmpty(t, snapshotOut.DBSnapshots) + require.True(t, snapshotExists) } func TestGetDBConnection(t *testing.T) { From cda9636730b26e246ac7e860a81497dda0b53c3e Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Fri, 28 Apr 2023 10:47:40 +0200 Subject: [PATCH 06/12] fix aws err code for wait for snapshot in tests --- fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index 461b04b89d..aafa4fe381 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -78,7 +78,7 @@ func waitForFinalSnapshotToExist(ctx context.Context, rdsClient *RDS, clusterID if err != nil { if awsErr, ok := err.(awserr.Error); ok { - if awsErr.Code() != rds.ErrCodeDBClusterSnapshotNotFoundFault { + if awsErr.Code() != rds.ErrCodeDBSnapshotNotFoundFault { return false, err } From 1a073dab99c28e2e2620f5315117e8b4c6b08619 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Fri, 28 Apr 2023 11:18:11 +0200 Subject: [PATCH 07/12] change final snapshot test code to use dbclustersnapshot instead of dbsnapshot --- .github/workflows/rds.yaml | 1 + .../pkg/central/cloudprovider/awsclient/rds_test.go | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/rds.yaml b/.github/workflows/rds.yaml index c16b2252f7..00e13deb0b 100644 --- a/.github/workflows/rds.yaml +++ b/.github/workflows/rds.yaml @@ -32,6 +32,7 @@ on: - 'docs/**' - 'pkg/api/openapi/docs/**' - 'pkg/api/openapi/.openapi-generator-ignore' + - 'dp-terraform/**' jobs: verify-test: diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index aafa4fe381..83a71f3e31 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -72,13 +72,13 @@ func waitForFinalSnapshotToExist(ctx context.Context, rdsClient *RDS, clusterID for { select { case <-ticker.C: - snapshotOut, err := rdsClient.rdsClient.DescribeDBSnapshots(&rds.DescribeDBSnapshotsInput{ - DBSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final")), + snapshotOut, err := rdsClient.rdsClient.DescribeDBClusterSnapshots(&rds.DescribeDBClusterSnapshotsInput{ + DBClusterSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final")), }) if err != nil { if awsErr, ok := err.(awserr.Error); ok { - if awsErr.Code() != rds.ErrCodeDBSnapshotNotFoundFault { + if awsErr.Code() != rds.ErrCodeDBClusterSnapshotNotFoundFault { return false, err } @@ -87,7 +87,7 @@ func waitForFinalSnapshotToExist(ctx context.Context, rdsClient *RDS, clusterID } if snapshotOut != nil { - return len(snapshotOut.DBSnapshots) == 1, nil + return len(snapshotOut.DBClusterSnapshots) == 1, nil } case <-ctx.Done(): return false, fmt.Errorf("waiting for final DB snapshot: %w", ctx.Err()) From a411a4bbf307b0262d713c229bb0a63f1c510200 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Tue, 2 May 2023 13:45:11 +0200 Subject: [PATCH 08/12] PR feedback --- fleetshard/pkg/central/cloudprovider/awsclient/rds.go | 8 +++++--- .../pkg/central/cloudprovider/awsclient/rds_test.go | 10 +++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds.go index 29ed05ecb5..cb5ce9e98e 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds.go @@ -166,7 +166,6 @@ func (r *RDS) ensureInstanceDeleted(instanceID string) error { if instanceStatus != dbDeletingStatus { glog.Infof("Initiating deprovisioning of RDS database instance %s.", instanceID) - // TODO(ROX-13692): do not skip taking a final DB snapshot _, err := r.rdsClient.DeleteDBInstance(newDeleteCentralDBInstanceInput(instanceID, true)) if err != nil { return fmt.Errorf("deleting DB instance: %w", err) @@ -187,7 +186,6 @@ func (r *RDS) ensureClusterDeleted(clusterID string) error { if clusterStatus != dbDeletingStatus { glog.Infof("Initiating deprovisioning of RDS database cluster %s.", clusterID) - // TODO(ROX-13692): do not skip taking a final DB snapshot _, err := r.rdsClient.DeleteDBCluster(newDeleteCentralDBClusterInput(clusterID, false)) if err != nil { return fmt.Errorf("deleting DB cluster: %w", err) @@ -372,7 +370,7 @@ func newDeleteCentralDBClusterInput(clusterID string, skipFinalSnapshot bool) *r } if !skipFinalSnapshot { - input.FinalDBSnapshotIdentifier = aws.String(fmt.Sprintf("%s-%s", clusterID, "final")) + input.FinalDBSnapshotIdentifier = getFinalSnapshotID(clusterID) } return input @@ -401,6 +399,10 @@ func newRdsClient(awsConfig config.AWS, auth fleetmanager.Auth) (*rds.RDS, error return rds.New(sess), nil } +func getFinalSnapshotID(clusterID string) *string { + return aws.String(fmt.Sprintf("%s-%s", clusterID, "final")) +} + type tokenFetcher struct { auth fleetmanager.Auth } diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index 83a71f3e31..df5d107d1a 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -163,7 +163,15 @@ func TestRDSProvisioning(t *testing.T) { require.NoError(t, err) assert.True(t, clusterDeleted) - // test that final snapshot was created + // Always attemt to delete the final snapshot if it exists + defer func() { + _, err := rdsClient.rdsClient.DeleteDBClusterSnapshot( + &rds.DeleteDBClusterSnapshotInput{DBClusterSnapshotIdentifier: getFinalSnapshotID(clusterID)}, + ) + + assert.NoError(t, err) + }() + snapshotExists, err := waitForFinalSnapshotToExist(deleteCtx, rdsClient, clusterID) require.NoError(t, err) require.True(t, snapshotExists) From 14a3d3629759c627b5990bd4c511b4baa93f391e Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Tue, 2 May 2023 16:23:01 +0200 Subject: [PATCH 09/12] change RDS timeout configurations --- .github/workflows/rds.yaml | 2 +- Makefile | 2 +- fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rds.yaml b/.github/workflows/rds.yaml index 00e13deb0b..f6101d5f5c 100644 --- a/.github/workflows/rds.yaml +++ b/.github/workflows/rds.yaml @@ -68,4 +68,4 @@ jobs: AWS_AUTH_HELPER: "none" run: | ./dev/env/scripts/exec_fleetshard_sync.sh make test/rds - timeout-minutes: 35 + timeout-minutes: 50 diff --git a/Makefile b/Makefile index 2bedbc3eef..f43ba5b476 100644 --- a/Makefile +++ b/Makefile @@ -326,7 +326,7 @@ test: $(GOTESTSUM_BIN) # Runs the AWS RDS integration tests. test/rds: $(GOTESTSUM_BIN) RUN_RDS_TESTS=true \ - $(GOTESTSUM_BIN) --junitfile data/results/rds-integration-tests.xml --format $(GOTESTSUM_FORMAT) -- -p 1 -v -timeout 30m -count=1 \ + $(GOTESTSUM_BIN) --junitfile data/results/rds-integration-tests.xml --format $(GOTESTSUM_FORMAT) -- -p 1 -v -timeout 45m -count=1 \ ./fleetshard/pkg/central/cloudprovider/awsclient/... .PHONY: test/rds diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index df5d107d1a..50883546f4 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -17,7 +17,7 @@ import ( "github.com/stretchr/testify/require" ) -const awsTimeoutMinutes = 15 +const awsTimeoutMinutes = 30 func newTestRDS() (*RDS, error) { rdsClient, err := newTestRDSClient() From c016838d54c67c9bccd3f7060aac26509ac998f3 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Wed, 3 May 2023 08:59:19 +0200 Subject: [PATCH 10/12] PR fix --- fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go index 50883546f4..9c76300068 100644 --- a/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go +++ b/fleetshard/pkg/central/cloudprovider/awsclient/rds_test.go @@ -73,7 +73,7 @@ func waitForFinalSnapshotToExist(ctx context.Context, rdsClient *RDS, clusterID select { case <-ticker.C: snapshotOut, err := rdsClient.rdsClient.DescribeDBClusterSnapshots(&rds.DescribeDBClusterSnapshotsInput{ - DBClusterSnapshotIdentifier: aws.String(fmt.Sprintf("%s-%s", clusterID, "final")), + DBClusterSnapshotIdentifier: getFinalSnapshotID(clusterID), }) if err != nil { From 7f218adbb46a1d818b486c72aad8ba6b6727b5d8 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Wed, 3 May 2023 10:52:01 +0200 Subject: [PATCH 11/12] higher wait timeout for openshift-ci --- dev/env/defaults/00-defaults.env | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/env/defaults/00-defaults.env b/dev/env/defaults/00-defaults.env index dc1e2c192a..32e741119a 100644 --- a/dev/env/defaults/00-defaults.env +++ b/dev/env/defaults/00-defaults.env @@ -64,3 +64,4 @@ export RHACS_OPERATOR_RESOURCES_DEFAULTS='{"requests":{"cpu":"200m","memory":"30 export ENABLE_EXTERNAL_CONFIG_DEFAULT=false export AWS_AUTH_HELPER_DEFAULT="" +export WAIT_TIMEOUT="10m" From bdcf92032a53dab6b9e10606165637c5e41ad178 Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Wed, 3 May 2023 13:40:26 +0200 Subject: [PATCH 12/12] revert timeout change --- dev/env/defaults/00-defaults.env | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/env/defaults/00-defaults.env b/dev/env/defaults/00-defaults.env index 32e741119a..dc1e2c192a 100644 --- a/dev/env/defaults/00-defaults.env +++ b/dev/env/defaults/00-defaults.env @@ -64,4 +64,3 @@ export RHACS_OPERATOR_RESOURCES_DEFAULTS='{"requests":{"cpu":"200m","memory":"30 export ENABLE_EXTERNAL_CONFIG_DEFAULT=false export AWS_AUTH_HELPER_DEFAULT="" -export WAIT_TIMEOUT="10m"