Skip to content

Commit

Permalink
[CC-26984] metric-export: minor refactoring in prometheus resource
Browse files Browse the repository at this point in the history
* declare a seperate timeout for metric export enable/disable/status
  refresh
* Remove debug logs
* Rename retryFunc -> deletePromMetricExport
  • Loading branch information
arjunmahishi authored and aa-joshi committed May 17, 2024
1 parent 417f542 commit 9e502ed
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 77 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [1.7.0]

## Added

Expand All @@ -26,7 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
to true, attempts to delete the cluster will fail. Set to false to disable
delete protection.

## [1.5.0] - 2024-05-26
## [1.5.0] - 2024-04-26

- No changes.

Expand Down
34 changes: 24 additions & 10 deletions internal/provider/metric_export_prometheus_config_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"

"github.com/cockroachdb/cockroach-cloud-sdk-go/pkg/client"
"github.com/hashicorp/terraform-plugin-framework/path"
"github.com/hashicorp/terraform-plugin-framework/resource"
Expand All @@ -13,8 +17,12 @@ import (
"github.com/hashicorp/terraform-plugin-framework/types"
"github.com/hashicorp/terraform-plugin-log/tflog"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
"net/http"
"strings"
)

const (
metricExportEnableTimeout = time.Minute // This request just triggers an async job that rolls out the integration
metricExportDisableTimeout = time.Minute // Same as above. Different job, but the HTTP request behaves the same.
metricExportStableTimeout = 10 * time.Minute // This might take a while. Stability here means the integration is up and running
)

var metricExportPrometheusConfigAttributes = map[string]schema.Attribute{
Expand Down Expand Up @@ -105,7 +113,7 @@ func (r *metricExportPrometheusConfigResource) Create(
}

apiObj := &client.PrometheusMetricExportInfo{}
err = retry.RetryContext(ctx, clusterUpdateTimeout, retryEnablePrometheusMetricExport(
err = retry.RetryContext(ctx, metricExportEnableTimeout, retryEnablePrometheusMetricExport(
ctx, r.provider.service, clusterID, cluster,
apiObj,
))
Expand All @@ -116,7 +124,7 @@ func (r *metricExportPrometheusConfigResource) Create(
return
}

err = retry.RetryContext(ctx, clusterUpdateTimeout,
err = retry.RetryContext(ctx, metricExportStableTimeout,
waitForPrometheusMetricExportStableFunc(ctx, clusterID, r.provider.service, apiObj))
if err != nil {
resp.Diagnostics.AddError(
Expand Down Expand Up @@ -183,7 +191,10 @@ func retryEnablePrometheusMetricExport(
}

func loadPrometheusMetricExportIntoTerraformState(
ID types.String, status types.String, targets types.Map, state *ClusterPrometheusMetricExportConfig,
ID types.String,
status types.String,
targets types.Map,
state *ClusterPrometheusMetricExportConfig,
) {
state.ID = ID
state.Status = status
Expand All @@ -199,12 +210,15 @@ func waitForPrometheusMetricExportStableFunc(
return func() *retry.RetryError {
apiObj, httpResp, err := cl.GetPrometheusMetricExportInfo(ctx, clusterID)
if err != nil {
if httpResp != nil && httpResp.StatusCode < http.StatusServiceUnavailable {
if httpResp != nil && httpResp.StatusCode < http.StatusInternalServerError {
return retry.NonRetryableError(fmt.Errorf(
"error getting Prometheus metric export info: %s", formatAPIErrorMessage(err)))
} else {
// if this is a server error, we should retry
tflog.Error(ctx, "encountered a server error while refreshing Prometheus metric export status. Retrying...")
return retry.RetryableError(fmt.Errorf(
"encountered a server error while reading Prometheus metric export status - trying again"))
"encountered a server error while refreshing Prometheus metric export status",
))
}
}

Expand Down Expand Up @@ -306,7 +320,7 @@ func (r *metricExportPrometheusConfigResource) Delete(

clusterID := state.ID.ValueString()
cluster := &client.Cluster{}
retryFunc := func() retry.RetryFunc {
deletePromMetricExport := func() retry.RetryFunc {
return func() *retry.RetryError {
apiObj, httpResp, err := r.provider.service.DeletePrometheusMetricExport(ctx, clusterID)
if err != nil {
Expand Down Expand Up @@ -341,7 +355,7 @@ func (r *metricExportPrometheusConfigResource) Delete(

if apiObj.GetStatus() == client.METRICEXPORTSTATUSTYPE_DISABLING {
// We are passing empty PrometheusMetricExportInfo object as we don't need it in stage management.
err = retry.RetryContext(ctx, clusterUpdateTimeout,
err = retry.RetryContext(ctx, metricExportStableTimeout,
waitForPrometheusMetricExportStableFunc(ctx, clusterID, r.provider.service, client.NewPrometheusMetricExportInfoWithDefaults()))
if err != nil {
resp.Diagnostics.AddError(
Expand All @@ -355,7 +369,7 @@ func (r *metricExportPrometheusConfigResource) Delete(
}
}

err := retry.RetryContext(ctx, clusterUpdateTimeout, retryFunc())
err := retry.RetryContext(ctx, metricExportDisableTimeout, deletePromMetricExport())
if err != nil {
resp.Diagnostics.AddError(
"Error deleting Prometheus metric export config", err.Error(),
Expand Down
32 changes: 15 additions & 17 deletions internal/provider/metric_export_prometheus_config_resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,19 @@ package provider
import (
"context"
"fmt"
"net/http"
"os"
"testing"

"github.com/cockroachdb/cockroach-cloud-sdk-go/pkg/client"
mock_client "github.com/cockroachdb/terraform-provider-cockroach/mock"
"github.com/golang/mock/gomock"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-testing/helper/resource"
"github.com/hashicorp/terraform-plugin-testing/terraform"
"log"
"net/http"
"os"
"testing"
)

// TestAccMetricExportPrometheusConfigResource attempts to create, check, and destroy
// a real cluster. It will be skipped if TF_ACC isn't set.
func TestAccMetricExportPrometheusConfigResource(t *testing.T) {
t.Skip("Skipping until we can either integrate the AWS provider " +
"or import a permanent test fixture.")
t.Parallel()
clusterName := fmt.Sprintf("%s-prometheus-%s", tfTestPrefix, GenerateRandomString(4))
testMetricExportPrometheusConfigResource(t, clusterName, false)
Expand All @@ -45,7 +41,7 @@ func TestIntegrationMetricExportPrometheusConfigResource(t *testing.T) {
Name: clusterName,
CockroachVersion: "v22.2.0",
Plan: "DEDICATED",
CloudProvider: "AWS",
CloudProvider: "GCP",
State: "CREATED",
Config: client.ClusterConfig{
Dedicated: &client.DedicatedHardwareConfig{
Expand Down Expand Up @@ -149,7 +145,7 @@ func testMetricExportPrometheusConfigExists(
p := testAccProvider.(*provider)
p.service = NewService(cl)

rs, ok := s.RootModule().Resources[resourceName]
_, ok := s.RootModule().Resources[resourceName]
if !ok {
return fmt.Errorf("not found: %s", resourceName)
}
Expand All @@ -159,22 +155,24 @@ func testMetricExportPrometheusConfigExists(
}

clusterID := clusterRs.Primary.Attributes["id"]
log.Printf("[DEBUG] clusterID: %s, name %s", clusterRs.Primary.Attributes["id"], clusterRs.Primary.Attributes["name"])
config, _, err := p.service.GetPrometheusMetricExportInfo(context.TODO(), clusterID)
if err != nil {
return fmt.Errorf("metric export Prometheus config does not exist")
}

_, _, err := p.service.GetPrometheusMetricExportInfo(context.TODO(), clusterID)
if err == nil {
return nil
if config.GetStatus() != client.METRICEXPORTSTATUSTYPE_ENABLED {
return fmt.Errorf("metric export Prometheus config is not enabled")
}

return fmt.Errorf("metric export Prometheus config with site %s does not exist", rs.Primary.Attributes["site"])
return nil
}
}

func getTestMetricExportPrometheusConfigResourceCreateConfig(name string) string {
return fmt.Sprintf(`
resource "cockroach_cluster" "test" {
name = "%s"
cloud_provider = "AWS"
cloud_provider = "GCP"
dedicated = {
storage_gib = 35
num_virtual_cpus = 4
Expand All @@ -195,7 +193,7 @@ func getTestMetricExportPrometheusConfigResourceUpdateConfig(name string) string
return fmt.Sprintf(`
resource "cockroach_cluster" "test" {
name = "%s"
cloud_provider = "AWS"
cloud_provider = "GCP"
dedicated = {
storage_gib = 35
num_virtual_cpus = 4
Expand Down

This file was deleted.

This file was deleted.

1 change: 0 additions & 1 deletion vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ github.com/hashicorp/terraform-plugin-framework/provider/schema
github.com/hashicorp/terraform-plugin-framework/providerserver
github.com/hashicorp/terraform-plugin-framework/resource
github.com/hashicorp/terraform-plugin-framework/resource/schema
github.com/hashicorp/terraform-plugin-framework/resource/schema/booldefault
github.com/hashicorp/terraform-plugin-framework/resource/schema/boolplanmodifier
github.com/hashicorp/terraform-plugin-framework/resource/schema/defaults
github.com/hashicorp/terraform-plugin-framework/resource/schema/int64planmodifier
Expand Down

0 comments on commit 9e502ed

Please sign in to comment.