Skip to content

Commit

Permalink
Add retry for redistribution resource
Browse files Browse the repository at this point in the history
Since this resource does not have its own API endpoint, updates
are prone to concurrency errors. Retry here is temporary solution
until retry via sdk is implemented throughout the provider.

Signed-off-by: Anna Khmelnitsky <[email protected]>
  • Loading branch information
annakhm committed Nov 3, 2021
1 parent cf90a3f commit d7e7570
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 43 deletions.
36 changes: 36 additions & 0 deletions nsxt/policy_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/vmware/vsphere-automation-sdk-go/lib/vapi/std/errors"
"github.com/vmware/vsphere-automation-sdk-go/runtime/bindings"
"github.com/vmware/vsphere-automation-sdk-go/runtime/protocol/client"
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/infra/realized_state"
Expand Down Expand Up @@ -275,3 +276,38 @@ func convertModelBindingType(obj interface{}, sourceType bindings.BindingType, d

return gmObj, nil
}

func isTransientAPIError(err error) bool {
if _, ok := err.(errors.InvalidRequest); ok {
return true
}
if _, ok := err.(errors.ConcurrentChange); ok {
return true
}

return false
}

func retryUponTransientAPIError(operation func() error) error {
// NOTE: this is temporary solution until global retry mechanism is introduced
// via SDK decorator.
// Delete operation on platform is asyncrounous, meaning that even correct order of
// deletion can cause dependency issues that require retry from client side.
maxRetryAttempts := 3
var err error
for i := 0; i <= maxRetryAttempts; i++ {
err = operation()
if err == nil {
return nil
}

if !isTransientAPIError(err) {
// other type of error - probably legit
return err
}

log.Printf("[INFO] Retrying operation due to potenitially transient error `%s`, attempt %d", err, i+1)
}

return err
}
51 changes: 31 additions & 20 deletions nsxt/resource_nsxt_policy_gateway_redistribution_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,23 @@ func policyGatewayRedistributionConfigPatch(d *schema.ResourceData, m interface{
RouteRedistributionConfig: &redistributionStruct,
}

if isPolicyGlobalManager(m) {
// Use patch to only update the relevant fields
rawObj, err := convertModelBindingType(serviceStruct, model.LocaleServicesBindingType(), gm_model.LocaleServicesBindingType())
if err != nil {
return err
}
client := gm_tier0s.NewDefaultLocaleServicesClient(connector)
return client.Patch(gwID, localeServiceID, rawObj.(gm_model.LocaleServices))
doPatch := func() error {
if isPolicyGlobalManager(m) {
// Use patch to only update the relevant fields
rawObj, errConv := convertModelBindingType(serviceStruct, model.LocaleServicesBindingType(), gm_model.LocaleServicesBindingType())
if errConv != nil {
return errConv
}
client := gm_tier0s.NewDefaultLocaleServicesClient(connector)
return client.Patch(gwID, localeServiceID, rawObj.(gm_model.LocaleServices))

}
client := tier_0s.NewDefaultLocaleServicesClient(connector)
return client.Patch(gwID, localeServiceID, serviceStruct)
}
client := tier_0s.NewDefaultLocaleServicesClient(connector)
return client.Patch(gwID, localeServiceID, serviceStruct)
// since redistribution config is not a separate API endpoint, but sub-clause of Tier0,
// concurrency issues may arise that require retry from client side.
return retryUponTransientAPIError(doPatch)
}

func resourceNsxtPolicyGatewayRedistributionConfigCreate(d *schema.ResourceData, m interface{}) error {
Expand Down Expand Up @@ -223,25 +228,31 @@ func resourceNsxtPolicyGatewayRedistributionConfigDelete(d *schema.ResourceData,
return fmt.Errorf("Error obtaining Tier0 Gateway id or Locale Service id")
}

// Update the locale service with empty HaVipConfigs using get/post
var err error
if isPolicyGlobalManager(m) {
client := gm_tier0s.NewDefaultLocaleServicesClient(connector)
gmObj, err1 := client.Get(gwID, localeServiceID)
if err1 != nil {
return handleDeleteError("Tier0 Redistribution config", id, err)
// Update the locale service with empty Redistribution config using get/post
doUpdate := func() error {
var err error
if isPolicyGlobalManager(m) {
client := gm_tier0s.NewDefaultLocaleServicesClient(connector)
gmObj, err1 := client.Get(gwID, localeServiceID)
if err1 != nil {
return handleDeleteError("Tier0 Redistribution config", id, err)
}
gmObj.RouteRedistributionConfig = nil
_, err = client.Update(gwID, localeServiceID, gmObj)
return err
}
gmObj.RouteRedistributionConfig = nil
_, err = client.Update(gwID, localeServiceID, gmObj)
} else {
client := tier_0s.NewDefaultLocaleServicesClient(connector)
obj, err1 := client.Get(gwID, localeServiceID)
if err1 != nil {
return handleDeleteError("Tier0 Redistribution config", id, err)
}
obj.RouteRedistributionConfig = nil
_, err = client.Update(gwID, localeServiceID, obj)
return err

}

err := retryUponTransientAPIError(doUpdate)
if err != nil {
return handleDeleteError("Tier0 RedistributionConfig config", id, err)
}
Expand Down
28 changes: 5 additions & 23 deletions nsxt/resource_nsxt_policy_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (

"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
"github.com/vmware/vsphere-automation-sdk-go/lib/vapi/std/errors"
"github.com/vmware/vsphere-automation-sdk-go/runtime/bindings"
"github.com/vmware/vsphere-automation-sdk-go/runtime/data"
"github.com/vmware/vsphere-automation-sdk-go/runtime/protocol/client"
Expand Down Expand Up @@ -929,36 +928,19 @@ func resourceNsxtPolicyGroupDelete(d *schema.ResourceData, m interface{}) error
}

connector := getPolicyConnector(m)
var err error
forceDelete := true
failIfSubtreeExists := false

// NOTE: this is temporary solution until global retry mechanism is introduced
// via SDK decorator.
// Delete operation on platform is asyncrounous, meaning that even correct order of
// deletion can cause dependency issues that require retry from client side.
maxRetryAttempts := 2
for i := 0; i <= maxRetryAttempts; i++ {
doDelete := func() error {
if isPolicyGlobalManager(m) {
client := gm_domains.NewDefaultGroupsClient(connector)
err = client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete)
} else {
client := domains.NewDefaultGroupsClient(connector)
err = client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete)
}

if err != nil {
if _, ok := err.(errors.InvalidRequest); ok {
log.Printf("[INFO] Retrying group deletion due to potenitially transient error `%s`, attempt %d", err, i+1)
} else {
// other type of error - probably legit
break
}
} else {
break
return client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete)
}
client := domains.NewDefaultGroupsClient(connector)
return client.Delete(d.Get("domain").(string), id, &failIfSubtreeExists, &forceDelete)
}

err := retryUponTransientAPIError(doDelete)
if err != nil {
return handleDeleteError("Group", id, err)
}
Expand Down

0 comments on commit d7e7570

Please sign in to comment.