Skip to content

Commit

Permalink
fix: successfully delete instances and bindings with invalid state
Browse files Browse the repository at this point in the history
As part of the upgrade epic, we added checks to make sure that service
instances and bindings had been updated to the latest version. This
involves parsing the Terraform state and extracting the version. But
sometimes the Terraform state is not valid due to being empty, or being
invalid JSON. Typically this is after a failure to provision/bind,
and the service instance or binding is in a failed state. In these
situations, the lack of state data means CSB does not have enough data to
successfully clean up using Terraform. We have had feedback that this
can be problematic for users because they have to do a manual purge of
the service instance, and it would be easier if the delete operations
would just work. This is a tradeoff because a manual purge could have
the effect of triggering someone to look at the backing IaaS to look for
resource leakage. By making this just work, we might make resource
leakage worse.

[#185835561](https://www.pivotaltracker.com/story/show/185835561)
  • Loading branch information
blgm committed Feb 15, 2024
1 parent 6ee2551 commit 5bb3ca7
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 45 deletions.
47 changes: 35 additions & 12 deletions brokerapi/broker/deprovision.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@ package broker

import (
"context"
"errors"
"fmt"

"code.cloudfoundry.org/lager/v3"
"github.com/pivotal-cf/brokerapi/v10/domain"
"github.com/pivotal-cf/brokerapi/v10/domain/apiresponses"

"github.com/cloudfoundry/cloud-service-broker/dbservice/models"
"github.com/cloudfoundry/cloud-service-broker/internal/paramparser"
"github.com/cloudfoundry/cloud-service-broker/pkg/broker"
"github.com/cloudfoundry/cloud-service-broker/pkg/providers/tf/workspace"
"github.com/cloudfoundry/cloud-service-broker/utils/correlation"
"github.com/cloudfoundry/cloud-service-broker/utils/request"
"github.com/pivotal-cf/brokerapi/v10/domain"
"github.com/pivotal-cf/brokerapi/v10/domain/apiresponses"
)

// Deprovision destroys an existing instance of a service.
Expand Down Expand Up @@ -53,7 +55,18 @@ func (broker *ServiceBroker) Deprovision(ctx context.Context, instanceID string,
return domain.DeprovisionServiceSpec{}, err
}

if err := serviceProvider.CheckUpgradeAvailable(deploymentID); err != nil {
err = serviceProvider.CheckUpgradeAvailable(deploymentID)
switch {
case errors.As(err, &workspace.CannotReadVersionError{}):
// In the special case of not being able to read the version during deprovision, we succeed immediately because:
// - we have had feedback that failing here creates unwanted manual work for CF admins
// - the Terraform state is empty or invalid, so it would be impossible for CSB to do a successful cleanup
broker.Logger.Info("deprovision-cannot-read-version")
if err := broker.removeServiceInstanceData(ctx, instanceID, serviceProvider); err != nil {
broker.Logger.Error("deprovision-cleanup", err)
}
return domain.DeprovisionServiceSpec{}, nil
case err != nil:
return domain.DeprovisionServiceSpec{}, fmt.Errorf("failed to delete: %s", err.Error())
}

Expand Down Expand Up @@ -87,15 +100,11 @@ func (broker *ServiceBroker) Deprovision(ctx context.Context, instanceID string,
}

if operationID == nil {
// soft-delete instance details from the db if this is a synchronous operation
// if it's an async operation we can't delete from the db until we're sure delete succeeded, so this is
// handled internally to LastOperation
if err := broker.store.DeleteServiceInstanceDetails(instanceID); err != nil {
return domain.DeprovisionServiceSpec{}, fmt.Errorf("error deleting instance details from database: %s. WARNING: this instance will remain visible in cf. Contact your operator for cleanup", err)
}
if err := broker.store.DeleteProvisionRequestDetails(instanceID); err != nil {
return domain.DeprovisionServiceSpec{}, fmt.Errorf("error deleting provision request details from the database: %w", err)
// If this is a synchronous operation, then immediately remove the service instance data from the database
if err := broker.removeServiceInstanceData(ctx, instanceID, serviceProvider); err != nil {
return domain.DeprovisionServiceSpec{}, err
}

return domain.DeprovisionServiceSpec{}, nil
}

Expand All @@ -111,3 +120,17 @@ func (broker *ServiceBroker) Deprovision(ctx context.Context, instanceID string,
}
return response, nil
}

func (broker *ServiceBroker) removeServiceInstanceData(ctx context.Context, instanceID string, serviceProvider broker.ServiceProvider) error {
if err := broker.store.DeleteServiceInstanceDetails(instanceID); err != nil {
return fmt.Errorf("error deleting instance details from database: %s. WARNING: this instance will remain visible in cf. Contact your operator for cleanup", err)
}
if err := broker.store.DeleteProvisionRequestDetails(instanceID); err != nil {
return fmt.Errorf("error deleting provision request details from the database: %w", err)
}
if err := serviceProvider.DeleteInstanceData(ctx, instanceID); err != nil {
return fmt.Errorf("error deleting provider instance data from database: %s", err)
}

return nil
}
29 changes: 29 additions & 0 deletions brokerapi/broker/deprovision_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/cloudfoundry/cloud-service-broker/internal/storage"
pkgBroker "github.com/cloudfoundry/cloud-service-broker/pkg/broker"
pkgBrokerFakes "github.com/cloudfoundry/cloud-service-broker/pkg/broker/brokerfakes"
"github.com/cloudfoundry/cloud-service-broker/pkg/providers/tf/workspace"
"github.com/cloudfoundry/cloud-service-broker/pkg/varcontext"
"github.com/cloudfoundry/cloud-service-broker/utils"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -125,6 +126,34 @@ var _ = Describe("Deprovision", func() {
Expect(actualInstance).To(Equal(instanceToDeleteID))
})

When("error reading instance version", func() {
It("should succeed anyway", func() {
deprovisionDetails = domain.DeprovisionDetails{
ServiceID: offeringID,
PlanID: "some-non-existent-plan",
}
fakeServiceProvider.CheckUpgradeAvailableReturns(workspace.CannotReadVersionError{})

_, err := serviceBroker.Deprovision(context.TODO(), instanceToDeleteID, deprovisionDetails, true)
Expect(err).NotTo(HaveOccurred())

By("validating SI details delete call")
Expect(fakeStorage.DeleteServiceInstanceDetailsCallCount()).To(Equal(1))
actualInstanceID := fakeStorage.DeleteServiceInstanceDetailsArgsForCall(0)
Expect(actualInstanceID).To(Equal(instanceToDeleteID))

By("validating provision parameters delete call")
Expect(fakeStorage.DeleteProvisionRequestDetailsCallCount()).To(Equal(1))
actualInstance := fakeStorage.DeleteProvisionRequestDetailsArgsForCall(0)
Expect(actualInstance).To(Equal(instanceToDeleteID))

By("validating the instance workspace delete call")
Expect(fakeServiceProvider.DeleteInstanceDataCallCount()).To(Equal(1))
_, actualInstanceID = fakeServiceProvider.DeleteInstanceDataArgsForCall(0)
Expect(actualInstanceID).To(Equal(instanceToDeleteID))
})
})

Describe("deprovision variables", func() {
When("there were provision variables during provision or update", func() {
BeforeEach(func() {
Expand Down
37 changes: 29 additions & 8 deletions brokerapi/broker/unbind.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@ package broker

import (
"context"
"errors"
"fmt"

"code.cloudfoundry.org/lager/v3"
"github.com/pivotal-cf/brokerapi/v10/domain"
"github.com/pivotal-cf/brokerapi/v10/domain/apiresponses"

"github.com/cloudfoundry/cloud-service-broker/internal/paramparser"
"github.com/cloudfoundry/cloud-service-broker/pkg/broker"
"github.com/cloudfoundry/cloud-service-broker/pkg/providers/tf/workspace"
"github.com/cloudfoundry/cloud-service-broker/utils/correlation"
"github.com/cloudfoundry/cloud-service-broker/utils/request"
"github.com/pivotal-cf/brokerapi/v10/domain"
"github.com/pivotal-cf/brokerapi/v10/domain/apiresponses"
)

// Unbind destroys an account and credentials with access to an instance of a service.
Expand Down Expand Up @@ -45,7 +47,17 @@ func (broker *ServiceBroker) Unbind(ctx context.Context, instanceID, bindingID s
}

err = serviceProvider.CheckUpgradeAvailable(generateTFBindingID(instanceID, bindingID))
if err != nil {
switch {
case errors.As(err, &workspace.CannotReadVersionError{}):
// In the special case of not being able to read the version during unbind, we succeed immediately because:
// - we have had feedback that failing here creates unwanted manual work for CF admins
// - the Terraform state is empty or invalid, so it would be impossible for CSB to do a successful cleanup
broker.Logger.Info("unbind-cannot-read-version", lager.Data{"error": err})
if err := broker.removeBindingData(ctx, instanceID, bindingID, serviceDefinition, serviceProvider); err != nil {
broker.Logger.Error("unbind-cleanup", err)
}
return domain.UnbindSpec{}, nil
case err != nil:
return domain.UnbindSpec{}, fmt.Errorf("failed to unbind: %s", err.Error())
}

Expand Down Expand Up @@ -75,6 +87,14 @@ func (broker *ServiceBroker) Unbind(ctx context.Context, instanceID, bindingID s
return domain.UnbindSpec{}, err
}

if err := broker.removeBindingData(ctx, instanceID, bindingID, serviceDefinition, serviceProvider); err != nil {
return domain.UnbindSpec{}, err
}

return domain.UnbindSpec{}, nil
}

func (broker *ServiceBroker) removeBindingData(ctx context.Context, instanceID, bindingID string, serviceDefinition *broker.ServiceDefinition, serviceProvider broker.ServiceProvider) error {
if broker.Credstore != nil {
credentialName := getCredentialName(broker.getServiceName(serviceDefinition), bindingID)

Expand All @@ -89,15 +109,16 @@ func (broker *ServiceBroker) Unbind(ctx context.Context, instanceID, bindingID s

// remove binding from database
if err := broker.store.DeleteServiceBindingCredentials(bindingID, instanceID); err != nil {
return domain.UnbindSpec{}, fmt.Errorf("error soft-deleting credentials from database: %s. WARNING: these credentials will remain visible in cf. Contact your operator for cleanup", err)
return fmt.Errorf("error soft-deleting credentials from database: %s. WARNING: these credentials will remain visible in cf. Contact your operator for cleanup", err)
}
if err := broker.store.DeleteBindRequestDetails(bindingID, instanceID); err != nil {
return domain.UnbindSpec{}, fmt.Errorf("error soft-deleting bind request details from database: %s", err)
return fmt.Errorf("error soft-deleting bind request details from database: %s", err)
}

// delete the Terraform workspace from the database
if err := serviceProvider.DeleteBindingData(ctx, instanceID, bindingID); err != nil {
return domain.UnbindSpec{}, fmt.Errorf("error deleting provider binding data from database: %s", err)
return fmt.Errorf("error deleting provider binding data from database: %s", err)
}

return domain.UnbindSpec{}, nil
return nil
}
43 changes: 37 additions & 6 deletions brokerapi/broker/unbind_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@ import (
"fmt"

"code.cloudfoundry.org/lager/v3"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/pivotal-cf/brokerapi/v10/domain"
"github.com/pivotal-cf/brokerapi/v10/domain/apiresponses"
"github.com/pivotal-cf/brokerapi/v10/middlewares"

"github.com/cloudfoundry/cloud-service-broker/brokerapi/broker"
"github.com/cloudfoundry/cloud-service-broker/brokerapi/broker/brokerfakes"
"github.com/cloudfoundry/cloud-service-broker/dbservice/models"
"github.com/cloudfoundry/cloud-service-broker/internal/storage"
pkgBroker "github.com/cloudfoundry/cloud-service-broker/pkg/broker"
pkgBrokerFakes "github.com/cloudfoundry/cloud-service-broker/pkg/broker/brokerfakes"
"github.com/cloudfoundry/cloud-service-broker/pkg/credstore/credstorefakes"
"github.com/cloudfoundry/cloud-service-broker/pkg/providers/tf/workspace"
"github.com/cloudfoundry/cloud-service-broker/pkg/varcontext"
"github.com/cloudfoundry/cloud-service-broker/utils"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/pivotal-cf/brokerapi/v10/domain"
"github.com/pivotal-cf/brokerapi/v10/domain/apiresponses"
"github.com/pivotal-cf/brokerapi/v10/middlewares"
)

var _ = Describe("Unbind", func() {
Expand Down Expand Up @@ -209,6 +209,37 @@ var _ = Describe("Unbind", func() {
})
})
})

When("cannot read terraform state version", func() {
It("should succeed anyway", func() {
fakeServiceProvider.CheckUpgradeAvailableReturns(workspace.CannotReadVersionError{})

_, err := serviceBroker.Unbind(context.TODO(), instanceID, bindingID, unbindDetails, false)
Expect(err).NotTo(HaveOccurred())

By("validating credstore delete has been called")
Expect(fakeCredStore.DeletePermissionCallCount()).To(Equal(1))
Expect(fakeCredStore.DeleteCallCount()).To(Equal(1))

By("validating storage is asked to delete binding credentials")
Expect(fakeStorage.DeleteServiceBindingCredentialsCallCount()).To(Equal(1))
actualBindingID, actualInstanceID := fakeStorage.DeleteServiceBindingCredentialsArgsForCall(0)
Expect(actualBindingID).To(Equal(bindingID))
Expect(actualInstanceID).To(Equal(instanceID))

By("validating storage is asked to delete binding request details")
Expect(fakeStorage.DeleteBindRequestDetailsCallCount()).To(Equal(1))
actualBindingID, actualInstanceID = fakeStorage.DeleteBindRequestDetailsArgsForCall(0)
Expect(actualBindingID).To(Equal(bindingID))
Expect(actualInstanceID).To(Equal(instanceID))

By("validating the provider service is asked to delete the binding data")
Expect(fakeServiceProvider.DeleteBindingDataCallCount()).To(Equal(1))
_, actualInstanceID, actualBindingID = fakeServiceProvider.DeleteBindingDataArgsForCall(0)
Expect(actualBindingID).To(Equal(bindingID))
Expect(actualInstanceID).To(Equal(instanceID))
})
})
})

Describe("unsuccessful unbind", func() {
Expand Down
10 changes: 6 additions & 4 deletions integrationtest/binding_cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ var _ = Describe("Binding Cleanup", func() {
Expect(broker.DeleteBinding(instance, binding.GUID)).To(MatchError(ContainSubstring("unexpected status code 410")))
})

// This test captures an incorrect behavior that we want to fix
It("fails to clean up after a corrupted binding", func() {
It("successfully deletes a corrupted binding", func() {
By("provisioning successfully")
instance := must(broker.Provision(goodServiceOfferingGUID, goodServicePlanGUID))

Expand All @@ -65,7 +64,10 @@ var _ = Describe("Binding Cleanup", func() {
Error,
).To(Succeed())

By("failing to clean up the binding")
Expect(broker.DeleteBinding(instance, binding.GUID)).To(MatchError(ContainSubstring("failed to unbind: invalid workspace state unexpected end of JSON input")))
By("deleting the binding")
Expect(broker.DeleteBinding(instance, binding.GUID)).To(Succeed())

By("logging that the version could not be read")
Expect(broker.Stdout.String()).To(ContainSubstring("unbind-cannot-read-version"))
})
})
20 changes: 12 additions & 8 deletions integrationtest/provision_cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,19 @@ var _ = Describe("Provision Cleanup", func() {
})
})

// This test captures an incorrect behavior that we want to fix
It("fails to clean up after a provision failed with empty state", func() {
It("successfully deletes after failed provision with empty state", func() {
By("failing to provision")
instance, err := broker.Provision(serviceOfferingGUID, servicePlanGUID)
Expect(err).To(MatchError("provision failed with state: failed"))

By("failing to clean up")
Expect(broker.Deprovision(instance)).To(MatchError(ContainSubstring("failed to delete: workspace state not generated")))
By("deleting the service instance")
Expect(broker.Deprovision(instance)).To(Succeed())

By("logging that the version could not be read")
Expect(broker.Stdout.String()).To(ContainSubstring("deprovision-cannot-read-version"))
})

// This test captures an incorrect behavior that we want to fix
It("fails to clean up after a provision failed with corrupted state", func() {
It("successfully deletes after failed provision with corrupted state", func() {
By("failing to provision")
instance, err := broker.Provision(serviceOfferingGUID, servicePlanGUID)
Expect(err).To(MatchError("provision failed with state: failed"))
Expand All @@ -59,7 +60,10 @@ var _ = Describe("Provision Cleanup", func() {
Error,
).To(Succeed())

By("failing to clean up")
Expect(broker.Deprovision(instance)).To(MatchError(ContainSubstring("failed to delete: invalid workspace state unexpected end of JSON input")))
By("deleting the service instance")
Expect(broker.Deprovision(instance)).To(Succeed())

By("logging that the version could not be read")
Expect(broker.Stdout.String()).To(ContainSubstring("deprovision-cannot-read-version"))
})
})
5 changes: 4 additions & 1 deletion internal/testdrive/broker_deprovision.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ func (b *Broker) Deprovision(s ServiceInstance) error {
switch {
case deprovisionResponse.Error != nil:
return deprovisionResponse.Error
case deprovisionResponse.StatusCode != http.StatusAccepted:
case deprovisionResponse.StatusCode == http.StatusOK: // ok - synchronous
return nil
case deprovisionResponse.StatusCode == http.StatusAccepted: // ok - asynchronous - poll last operation
default:
return &UnexpectedStatusError{StatusCode: deprovisionResponse.StatusCode, ResponseBody: deprovisionResponse.ResponseBody}
}

Expand Down
9 changes: 9 additions & 0 deletions pkg/providers/tf/workspace/cannot_read_version_error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package workspace

type CannotReadVersionError struct {
message string
}

func (s CannotReadVersionError) Error() string {
return s.message
}
11 changes: 5 additions & 6 deletions pkg/providers/tf/workspace/workspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,15 @@ type TerraformWorkspace struct {
}

func (workspace *TerraformWorkspace) StateTFVersion() (*version.Version, error) {
var receiver struct {
Version string `json:"terraform_version"`
if !workspace.HasState() {
return nil, CannotReadVersionError{message: "workspace state not generated"}
}

if workspace.State == nil {
return nil, fmt.Errorf("workspace state not generated")
var receiver struct {
Version string `json:"terraform_version"`
}

if err := json.Unmarshal(workspace.State, &receiver); err != nil {
return nil, fmt.Errorf("invalid workspace state %w", err)
return nil, CannotReadVersionError{message: fmt.Sprintf("invalid workspace state %s", err)}
}

return version.NewVersion(receiver.Version)
Expand Down
Loading

0 comments on commit 5bb3ca7

Please sign in to comment.