Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HCPF-1961: Implement retry mechanism for Get and Set Policy #870

Merged
merged 13 commits into from
Jul 2, 2024
Merged
3 changes: 3 additions & 0 deletions .changelog/870.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
Fix intermittent conflicts during IAM policy updates
```
6 changes: 4 additions & 2 deletions internal/clients/iampolicy/iam.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ import (
// Implementations should be created per resource and should keep track of the
// resource identifier.
type ResourceIamUpdater interface {
// Fetch the existing IAM policy attached to a resource.
// GetResourceIamPolicy fetches the existing IAM policy attached to a resource.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for cleaning up the comments ✨

GetResourceIamPolicy(context.Context) (*models.HashicorpCloudResourcemanagerPolicy, diag.Diagnostics)

// Replaces the existing IAM Policy attached to a resource.
// SetResourceIamPolicy replaces the existing IAM Policy attached to a resource.
// If an error occurs, a new custom ErrorHTTPStatusCode should be appended to the diagnostics.
SetResourceIamPolicy(ctx context.Context, policy *models.HashicorpCloudResourcemanagerPolicy) (*models.HashicorpCloudResourcemanagerPolicy, diag.Diagnostics)

// GetMutexKey gets the mutex key.
// A mutex guards against concurrent to call to the SetResourceIamPolicy method.
// The mutex key should be globally unique.
GetMutexKey() string
Expand Down
91 changes: 60 additions & 31 deletions internal/clients/iampolicy/iam_binding_batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ package iampolicy

import (
"context"
"log"
"sync"
"time"

iamModels "github.com/hashicorp/hcp-sdk-go/clients/cloud-iam/stable/2019-12-10/models"
"github.com/hashicorp/hcp-sdk-go/clients/cloud-resource-manager/stable/2019-12-10/models"
"github.com/hashicorp/terraform-plugin-framework/diag"
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
"golang.org/x/exp/maps"
)

Expand Down Expand Up @@ -184,45 +186,72 @@ func (f *policyFuture) executeModifers(ctx context.Context, u ResourceIamUpdater
return
}

// Get the existing policy
ep, diags := u.GetResourceIamPolicy(ctx)
if diags.HasError() {
f.set(nil, diags)
return
}
backoff := time.Second
maxBackoff := 30 * time.Second

// Determine the principal's we need to lookup their type.
principalSet, diags := f.getPrincipals(ctx, client)
if diags.HasError() {
f.set(nil, diags)
return
}
for {
// Get the existing policy
ep, diags := u.GetResourceIamPolicy(ctx)
if diags.HasError() {
f.set(nil, diags)
return
}

// Remove any bindings needed
bindings := ToMap(ep)
for _, rm := range f.removers {
if members, ok := bindings[rm.RoleID]; ok {
delete(members, rm.Members[0].MemberID)
if len(members) == 0 {
delete(bindings, rm.RoleID)
}
// Determine the principal's we need to lookup their type.
principalSet, diags := f.getPrincipals(ctx, client)
if diags.HasError() {
f.set(nil, diags)
return
}
}

// Go through the setters and apply them
for _, s := range f.setters {
members, ok := bindings[s.RoleID]
if !ok {
members = make(map[string]*models.HashicorpCloudResourcemanagerPolicyBindingMemberType, 4)
bindings[s.RoleID] = members
// Remove any bindings needed
bindings := ToMap(ep)
for _, rm := range f.removers {
if members, ok := bindings[rm.RoleID]; ok {
delete(members, rm.Members[0].MemberID)
if len(members) == 0 {
delete(bindings, rm.RoleID)
}
}
}

members[s.Members[0].MemberID] = principalSet[s.Members[0].MemberID]
}
// Go through the setters and apply them
for _, s := range f.setters {
members, ok := bindings[s.RoleID]
if !ok {
members = make(map[string]*models.HashicorpCloudResourcemanagerPolicyBindingMemberType, 4)
bindings[s.RoleID] = members
}

// Apply the policy
f.set(u.SetResourceIamPolicy(ctx, FromMap(ep.Etag, bindings)))
members[s.Members[0].MemberID] = principalSet[s.Members[0].MemberID]
}

// Apply the policy
ep, diags = u.SetResourceIamPolicy(ctx, FromMap(ep.Etag, bindings))
if diags.HasError() {
if customdiags.HasConflictError(diags) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment here explaining that the policy object has changed and the etag is no longer valid and how the retry works.

// Policy object has changed since it was last gotten and the etag is now different.
// Continuously retry getting and setting the policy with an increasing backoff period until the maximum backoff period is reached.
if backoff > maxBackoff {
log.Printf("[DEBUG]: Maximum backoff time reached. Aborting operation.")
f.set(nil, diags)
return
}
log.Printf("[DEBUG]: Operation failed due to conflicts. Operation will be restarted after %s", backoff)
// Pause the execution for the duration specified by the current backoff time.
time.Sleep(backoff)
// Double the backoff time to increase the delay for the next retry.
backoff *= 2
continue
}
f.set(nil, diags)
return
}

// Successfully applied policy
f.set(ep, nil)
return
}
}

// getPrincipals returns a map of principal_id to binding type. The binding type
Expand Down
63 changes: 63 additions & 0 deletions internal/customdiags/error_http_status_code.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package customdiags

import (
"net/http"

"github.com/hashicorp/terraform-plugin-framework/diag"
)

// ErrorHTTPStatusCode is an error diagnostic that stored the error code.
type ErrorHTTPStatusCode struct {
detail string
summary string
HTTPStatusCode int
}

// Detail returns the diagnostic detail.
func (d ErrorHTTPStatusCode) Detail() string {
return d.detail
}

// Equal returns true if the other diagnostic is equivalent.
func (d ErrorHTTPStatusCode) Equal(o diag.Diagnostic) bool {
ed, ok := o.(ErrorHTTPStatusCode)

if !ok {
return false
}

return ed.Summary() == d.Summary() && ed.Detail() == d.Detail() && ed.HTTPStatusCode == d.HTTPStatusCode
}

// Summary returns the diagnostic summary.
func (d ErrorHTTPStatusCode) Summary() string {
return d.summary
}

// Severity returns the diagnostic severity.
func (d ErrorHTTPStatusCode) Severity() diag.Severity {
return diag.SeverityError
}

// NewErrorHTTPStatusCode returns a new error severity diagnostic with the given summary, detail and error code.
func NewErrorHTTPStatusCode(summary string, detail string, statusCode int) ErrorHTTPStatusCode {
return ErrorHTTPStatusCode{
detail: detail,
summary: summary,
HTTPStatusCode: statusCode,
}
}

// HasConflictError checks if a diagnostic has a specific error code.
func HasConflictError(diags diag.Diagnostics) bool {
for _, d := range diags {
diag, ok := d.(*ErrorHTTPStatusCode)
if !ok {
return false
}
if diag.HTTPStatusCode == http.StatusConflict {
return true
}
}
return false
}
11 changes: 8 additions & 3 deletions internal/provider/iam/resource_group_iam_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/hashicorp/terraform-plugin-framework/types"
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
"github.com/hashicorp/terraform-provider-hcp/internal/provider/iam/helper"
)

Expand Down Expand Up @@ -100,8 +101,7 @@ func (u *groupIAMPolicyUpdater) GetResourceIamPolicy(ctx context.Context) (*mode
// Groups do not have a policy by default
return &models.HashicorpCloudResourcemanagerPolicy{}, diags
}

diags.AddError("failed to retrieve group IAM policy", err.Error())
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve group IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand All @@ -119,7 +119,12 @@ func (u *groupIAMPolicyUpdater) SetResourceIamPolicy(ctx context.Context, policy

res, err := u.client.ResourceService.ResourceServiceSetIamPolicy(params, nil)
if err != nil {
diags.AddError("failed to set group IAM policy", err.Error())
serviceErr, ok := err.(*resource_service.ResourceServiceSetIamPolicyDefault)
if !ok {
diags.AddError("failed to cast resource IAM policy error", err.Error())
return nil, diags
}
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to set group IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/hashicorp/terraform-plugin-framework/resource/schema"
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
)

// orgIAMSchema is the schema for the organization IAM resources
Expand Down Expand Up @@ -64,7 +65,12 @@ func (u *orgIAMPolicyUpdater) GetResourceIamPolicy(ctx context.Context) (*models
params.ID = u.client.Config.OrganizationID
res, err := u.client.Organization.OrganizationServiceGetIamPolicy(params, nil)
if err != nil {
diags.AddError("failed to retrieve organization IAM policy", err.Error())
serviceErr, ok := err.(*organization_service.OrganizationServiceGetIamPolicyDefault)
if !ok {
diags.AddError("failed to cast organization IAM policy error", err.Error())
return nil, diags
}
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve organization IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand All @@ -82,7 +88,12 @@ func (u *orgIAMPolicyUpdater) SetResourceIamPolicy(ctx context.Context, policy *

res, err := u.client.Organization.OrganizationServiceSetIamPolicy(params, nil)
if err != nil {
diags.AddError("failed to retrieve organization IAM policy", err.Error())
serviceErr, ok := err.(*organization_service.OrganizationServiceSetIamPolicyDefault)
if !ok {
diags.AddError("failed to cast organization IAM policy error", err.Error())
return nil, diags
}
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to update organization IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/hashicorp/terraform-plugin-framework/types"
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
)

// projectIAMSchema is the schema for the project IAM resources
Expand Down Expand Up @@ -86,7 +87,12 @@ func (u *projectIAMPolicyUpdater) GetResourceIamPolicy(ctx context.Context) (*mo
params.ID = u.projectID
res, err := u.client.Project.ProjectServiceGetIamPolicy(params, nil)
if err != nil {
diags.AddError("failed to retrieve project IAM policy", err.Error())
serviceErr, ok := err.(*project_service.ProjectServiceGetIamPolicyDefault)
if !ok {
diags.AddError("failed to cast project IAM policy error", err.Error())
return nil, diags
}
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve project IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand All @@ -104,7 +110,12 @@ func (u *projectIAMPolicyUpdater) SetResourceIamPolicy(ctx context.Context, poli

res, err := u.client.Project.ProjectServiceSetIamPolicy(params, nil)
if err != nil {
diags.AddError("failed to retrieve project IAM policy", err.Error())
serviceErr, ok := err.(*project_service.ProjectServiceSetIamPolicyDefault)
if !ok {
diags.AddError("failed to cast project IAM policy error", err.Error())
return nil, diags
}
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to update project IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/hashicorp/terraform-plugin-framework/types"
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
)

// vaultSecretsAppIAMSchema is the schema for the vault secret resource IAM resources
Expand Down Expand Up @@ -92,7 +93,7 @@ func (u *vaultSecretsAppResourceIAMPolicyUpdater) GetResourceIamPolicy(ctx conte
if serviceErr.Code() == http.StatusNotFound {
return &models.HashicorpCloudResourcemanagerPolicy{}, diags
}
diags.AddError("failed to retrieve resource IAM policy", err.Error())
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve resource IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand All @@ -111,7 +112,12 @@ func (u *vaultSecretsAppResourceIAMPolicyUpdater) SetResourceIamPolicy(ctx conte

res, err := u.client.ResourceService.ResourceServiceSetIamPolicy(params, nil)
if err != nil {
diags.AddError("failed to retrieve resource IAM policy", err.Error())
serviceErr, ok := err.(*resource_service.ResourceServiceSetIamPolicyDefault)
if !ok {
diags.AddError("failed to cast resource IAM policy error", err.Error())
return nil, diags
}
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to update resource IAM policy", err.Error(), serviceErr.Code()))
return nil, diags
}

Expand Down