From 28cde104eacede9a1f264f01a5ddbc6eaf3f3416 Mon Sep 17 00:00:00 2001 From: Arjun Mahishi Date: Mon, 29 Apr 2024 13:32:12 +0530 Subject: [PATCH] [CC-26984] metric-export: minor refactoring in prometheus resource * declare a seperate timeout for metric export enable/disable/status refresh * Remove debug logs * Rename retryFunc -> deletePromMetricExport --- ...etric_export_prometheus_config_resource.go | 34 +++++++++++++------ ..._export_prometheus_config_resource_test.go | 20 +++-------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/internal/provider/metric_export_prometheus_config_resource.go b/internal/provider/metric_export_prometheus_config_resource.go index 49f8041b..4b2e6b22 100644 --- a/internal/provider/metric_export_prometheus_config_resource.go +++ b/internal/provider/metric_export_prometheus_config_resource.go @@ -4,6 +4,10 @@ import ( "context" "encoding/json" "fmt" + "net/http" + "strings" + "time" + "github.com/cockroachdb/cockroach-cloud-sdk-go/pkg/client" "github.com/hashicorp/terraform-plugin-framework/path" "github.com/hashicorp/terraform-plugin-framework/resource" @@ -13,8 +17,12 @@ import ( "github.com/hashicorp/terraform-plugin-framework/types" "github.com/hashicorp/terraform-plugin-log/tflog" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry" - "net/http" - "strings" +) + +const ( + metricExportEnableTimeout = time.Minute // This request just triggers an async job that rolls out the integration + metricExportDisableTimeout = time.Minute // Same as above. Different job, but the HTTP request behaves the same. + metricExportStableTimeout = 10 * time.Minute // This might take a while. Stability here means the integration is up and running ) var metricExportPrometheusConfigAttributes = map[string]schema.Attribute{ @@ -105,7 +113,7 @@ func (r *metricExportPrometheusConfigResource) Create( } apiObj := &client.PrometheusMetricExportInfo{} - err = retry.RetryContext(ctx, clusterUpdateTimeout, retryEnablePrometheusMetricExport( + err = retry.RetryContext(ctx, metricExportEnableTimeout, retryEnablePrometheusMetricExport( ctx, r.provider.service, clusterID, cluster, apiObj, )) @@ -116,7 +124,7 @@ func (r *metricExportPrometheusConfigResource) Create( return } - err = retry.RetryContext(ctx, clusterUpdateTimeout, + err = retry.RetryContext(ctx, metricExportStableTimeout, waitForPrometheusMetricExportStableFunc(ctx, clusterID, r.provider.service, apiObj)) if err != nil { resp.Diagnostics.AddError( @@ -183,7 +191,10 @@ func retryEnablePrometheusMetricExport( } func loadPrometheusMetricExportIntoTerraformState( - ID types.String, status types.String, targets types.Map, state *ClusterPrometheusMetricExportConfig, + ID types.String, + status types.String, + targets types.Map, + state *ClusterPrometheusMetricExportConfig, ) { state.ID = ID state.Status = status @@ -199,12 +210,15 @@ func waitForPrometheusMetricExportStableFunc( return func() *retry.RetryError { apiObj, httpResp, err := cl.GetPrometheusMetricExportInfo(ctx, clusterID) if err != nil { - if httpResp != nil && httpResp.StatusCode < http.StatusServiceUnavailable { + if httpResp != nil && httpResp.StatusCode < http.StatusInternalServerError { return retry.NonRetryableError(fmt.Errorf( "error getting Prometheus metric export info: %s", formatAPIErrorMessage(err))) } else { + // if this is a server error, we should retry + tflog.Error(ctx, "encountered a server error while refreshing Prometheus metric export status. Retrying...") return retry.RetryableError(fmt.Errorf( - "encountered a server error while reading Prometheus metric export status - trying again")) + "encountered a server error while refreshing Prometheus metric export status", + )) } } @@ -306,7 +320,7 @@ func (r *metricExportPrometheusConfigResource) Delete( clusterID := state.ID.ValueString() cluster := &client.Cluster{} - retryFunc := func() retry.RetryFunc { + deletePromMetricExport := func() retry.RetryFunc { return func() *retry.RetryError { apiObj, httpResp, err := r.provider.service.DeletePrometheusMetricExport(ctx, clusterID) if err != nil { @@ -341,7 +355,7 @@ func (r *metricExportPrometheusConfigResource) Delete( if apiObj.GetStatus() == client.METRICEXPORTSTATUSTYPE_DISABLING { // We are passing empty PrometheusMetricExportInfo object as we don't need it in stage management. - err = retry.RetryContext(ctx, clusterUpdateTimeout, + err = retry.RetryContext(ctx, metricExportStableTimeout, waitForPrometheusMetricExportStableFunc(ctx, clusterID, r.provider.service, client.NewPrometheusMetricExportInfoWithDefaults())) if err != nil { resp.Diagnostics.AddError( @@ -355,7 +369,7 @@ func (r *metricExportPrometheusConfigResource) Delete( } } - err := retry.RetryContext(ctx, clusterUpdateTimeout, retryFunc()) + err := retry.RetryContext(ctx, metricExportDisableTimeout, deletePromMetricExport()) if err != nil { resp.Diagnostics.AddError( "Error deleting Prometheus metric export config", err.Error(), diff --git a/internal/provider/metric_export_prometheus_config_resource_test.go b/internal/provider/metric_export_prometheus_config_resource_test.go index 9018295f..cc4fc3b6 100644 --- a/internal/provider/metric_export_prometheus_config_resource_test.go +++ b/internal/provider/metric_export_prometheus_config_resource_test.go @@ -3,28 +3,18 @@ package provider import ( "context" "fmt" + "net/http" + "os" + "testing" + "github.com/cockroachdb/cockroach-cloud-sdk-go/pkg/client" mock_client "github.com/cockroachdb/terraform-provider-cockroach/mock" "github.com/golang/mock/gomock" "github.com/google/uuid" "github.com/hashicorp/terraform-plugin-testing/helper/resource" "github.com/hashicorp/terraform-plugin-testing/terraform" - "log" - "net/http" - "os" - "testing" ) -// TestAccMetricExportPrometheusConfigResource attempts to create, check, and destroy -// a real cluster. It will be skipped if TF_ACC isn't set. -func TestAccMetricExportPrometheusConfigResource(t *testing.T) { - t.Skip("Skipping until we can either integrate the AWS provider " + - "or import a permanent test fixture.") - t.Parallel() - clusterName := fmt.Sprintf("%s-prometheus-%s", tfTestPrefix, GenerateRandomString(4)) - testMetricExportPrometheusConfigResource(t, clusterName, false) -} - // TestIntegrationMetricExportPrometheusConfigResource attempts to create, check, // and destroy a cluster, but uses a mocked API service. func TestIntegrationMetricExportPrometheusConfigResource(t *testing.T) { @@ -159,8 +149,6 @@ func testMetricExportPrometheusConfigExists( } clusterID := clusterRs.Primary.Attributes["id"] - log.Printf("[DEBUG] clusterID: %s, name %s", clusterRs.Primary.Attributes["id"], clusterRs.Primary.Attributes["name"]) - _, _, err := p.service.GetPrometheusMetricExportInfo(context.TODO(), clusterID) if err == nil { return nil