diff --git a/nsxt/policy_utils.go b/nsxt/policy_utils.go index 7f901542a..aafc91774 100644 --- a/nsxt/policy_utils.go +++ b/nsxt/policy_utils.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + "github.com/vmware/vsphere-automation-sdk-go/lib/vapi/std/errors" "github.com/vmware/vsphere-automation-sdk-go/runtime/bindings" "github.com/vmware/vsphere-automation-sdk-go/runtime/protocol/client" "github.com/vmware/vsphere-automation-sdk-go/services/nsxt/infra/realized_state" @@ -275,3 +276,38 @@ func convertModelBindingType(obj interface{}, sourceType bindings.BindingType, d return gmObj, nil } + +func isTransientAPIError(err error) bool { + if _, ok := err.(errors.InvalidRequest); ok { + return true + } + if _, ok := err.(errors.ConcurrentChange); ok { + return true + } + + return false +} + +func retryUponTransientAPIError(operation func() error) error { + // NOTE: this is temporary solution until global retry mechanism is introduced + // via SDK decorator. + // Delete operation on platform is asyncrounous, meaning that even correct order of + // deletion can cause dependency issues that require retry from client side. + maxRetryAttempts := 3 + var err error + for i := 0; i <= maxRetryAttempts; i++ { + err = operation() + if err == nil { + return nil + } + + if !isTransientAPIError(err) { + // other type of error - probably legit + return err + } + + log.Printf("[INFO] Retrying operation due to potenitially transient error `%s`, attempt %d", err, i+1) + } + + return err +} diff --git a/nsxt/resource_nsxt_policy_gateway_redistribution_config.go b/nsxt/resource_nsxt_policy_gateway_redistribution_config.go index 147eecdeb..f5b579364 100644 --- a/nsxt/resource_nsxt_policy_gateway_redistribution_config.go +++ b/nsxt/resource_nsxt_policy_gateway_redistribution_config.go @@ -85,18 +85,23 @@ func policyGatewayRedistributionConfigPatch(d *schema.ResourceData, m interface{ RouteRedistributionConfig: &redistributionStruct, } - if isPolicyGlobalManager(m) { - // Use patch to only update the relevant fields - rawObj, err := convertModelBindingType(serviceStruct, model.LocaleServicesBindingType(), gm_model.LocaleServicesBindingType()) - if err != nil { - return err - } - client := gm_tier0s.NewDefaultLocaleServicesClient(connector) - return client.Patch(gwID, localeServiceID, rawObj.(gm_model.LocaleServices)) + doPatch := func() error { + if isPolicyGlobalManager(m) { + // Use patch to only update the relevant fields + rawObj, errConv := convertModelBindingType(serviceStruct, model.LocaleServicesBindingType(), gm_model.LocaleServicesBindingType()) + if errConv != nil { + return errConv + } + client := gm_tier0s.NewDefaultLocaleServicesClient(connector) + return client.Patch(gwID, localeServiceID, rawObj.(gm_model.LocaleServices)) + } + client := tier_0s.NewDefaultLocaleServicesClient(connector) + return client.Patch(gwID, localeServiceID, serviceStruct) } - client := tier_0s.NewDefaultLocaleServicesClient(connector) - return client.Patch(gwID, localeServiceID, serviceStruct) + // since redistribution config is not a separate API endpoint, but sub-clause of Tier0, + // concurrency issues may arise that require retry from client side. + return retryUponTransientAPIError(doPatch) } func resourceNsxtPolicyGatewayRedistributionConfigCreate(d *schema.ResourceData, m interface{}) error { @@ -223,17 +228,19 @@ func resourceNsxtPolicyGatewayRedistributionConfigDelete(d *schema.ResourceData, return fmt.Errorf("Error obtaining Tier0 Gateway id or Locale Service id") } - // Update the locale service with empty HaVipConfigs using get/post - var err error - if isPolicyGlobalManager(m) { - client := gm_tier0s.NewDefaultLocaleServicesClient(connector) - gmObj, err1 := client.Get(gwID, localeServiceID) - if err1 != nil { - return handleDeleteError("Tier0 Redistribution config", id, err) + // Update the locale service with empty Redistribution config using get/post + doUpdate := func() error { + var err error + if isPolicyGlobalManager(m) { + client := gm_tier0s.NewDefaultLocaleServicesClient(connector) + gmObj, err1 := client.Get(gwID, localeServiceID) + if err1 != nil { + return handleDeleteError("Tier0 Redistribution config", id, err) + } + gmObj.RouteRedistributionConfig = nil + _, err = client.Update(gwID, localeServiceID, gmObj) + return err } - gmObj.RouteRedistributionConfig = nil - _, err = client.Update(gwID, localeServiceID, gmObj) - } else { client := tier_0s.NewDefaultLocaleServicesClient(connector) obj, err1 := client.Get(gwID, localeServiceID) if err1 != nil { @@ -241,7 +248,11 @@ func resourceNsxtPolicyGatewayRedistributionConfigDelete(d *schema.ResourceData, } obj.RouteRedistributionConfig = nil _, err = client.Update(gwID, localeServiceID, obj) + return err + } + + err := retryUponTransientAPIError(doUpdate) if err != nil { return handleDeleteError("Tier0 RedistributionConfig config", id, err) } diff --git a/nsxt/resource_nsxt_policy_group.go b/nsxt/resource_nsxt_policy_group.go index a0069ce39..8522ecbf4 100644 --- a/nsxt/resource_nsxt_policy_group.go +++ b/nsxt/resource_nsxt_policy_group.go @@ -10,7 +10,6 @@ import ( "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation" - "github.com/vmware/vsphere-automation-sdk-go/lib/vapi/std/errors" "github.com/vmware/vsphere-automation-sdk-go/runtime/bindings" "github.com/vmware/vsphere-automation-sdk-go/runtime/data" "github.com/vmware/vsphere-automation-sdk-go/runtime/protocol/client" @@ -929,36 +928,19 @@ func resourceNsxtPolicyGroupDelete(d *schema.ResourceData, m interface{}) error } connector := getPolicyConnector(m) - var err error forceDelete := true failIfSubtreeExists := false - // NOTE: this is temporary solution until global retry mechanism is introduced - // via SDK decorator. - // Delete operation on platform is asyncrounous, meaning that even correct order of - // deletion can cause dependency issues that require retry from client side. - maxRetryAttempts := 2 - for i := 0; i <= maxRetryAttempts; i++ { + doDelete := func() error { if isPolicyGlobalManager(m) { client := gm_domains.NewDefaultGroupsClient(connector) - err = client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete) - } else { - client := domains.NewDefaultGroupsClient(connector) - err = client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete) - } - - if err != nil { - if _, ok := err.(errors.InvalidRequest); ok { - log.Printf("[INFO] Retrying group deletion due to potenitially transient error `%s`, attempt %d", err, i+1) - } else { - // other type of error - probably legit - break - } - } else { - break + return client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete) } + client := domains.NewDefaultGroupsClient(connector) + return client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete) } + err := retryUponTransientAPIError(doDelete) if err != nil { return handleDeleteError("Group", id, err) }