From ffae2a4241337a758c1334e1529653400c3b2a82 Mon Sep 17 00:00:00 2001 From: staebler Date: Fri, 22 Jun 2018 16:30:39 -0400 Subject: [PATCH] Re-work the TestCreateServiceInstanceWithProvisionFailure integration test to mitigate its flake --- test/integration/controller_instance_test.go | 200 +++++++++++-------- test/util/util.go | 8 +- 2 files changed, 128 insertions(+), 80 deletions(-) diff --git a/test/integration/controller_instance_test.go b/test/integration/controller_instance_test.go index a170e1db5c9..168c0b31806 100644 --- a/test/integration/controller_instance_test.go +++ b/test/integration/controller_instance_test.go @@ -22,7 +22,9 @@ import ( "net/http" "net/url" "reflect" + "sync/atomic" "testing" + "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1005,72 +1007,64 @@ func (e TimeoutError) Error() string { // TestCreateServiceInstanceWithProvisionFailure tests creating a ServiceInstance // with various failure results in response to the provision request. -// TODO(carolynvs): I'm disabling this test because it's a flake that fails so much that I'm seeing 🔥 -func FlakeTestCreateServiceInstanceWithProvisionFailure(t *testing.T) { +func TestCreateServiceInstanceWithProvisionFailure(t *testing.T) { cases := []struct { name string statusCode int nonHTTPResponseError error - conditionReason string - expectFailCondition bool + provisionErrorReason string + failReason string triggersOrphanMitigation bool }{ { - name: "Status OK", - statusCode: http.StatusOK, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + name: "Status OK", + statusCode: http.StatusOK, + provisionErrorReason: "ProvisionCallFailed", }, { name: "Status Created", statusCode: http.StatusCreated, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + provisionErrorReason: "ProvisionCallFailed", triggersOrphanMitigation: true, }, { name: "other 2xx", statusCode: http.StatusNoContent, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + provisionErrorReason: "ProvisionCallFailed", triggersOrphanMitigation: true, }, { - name: "3XX", - statusCode: 300, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + name: "3XX", + statusCode: 300, + provisionErrorReason: "ProvisionCallFailed", }, { name: "Status Request Timeout", statusCode: http.StatusRequestTimeout, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + provisionErrorReason: "ProvisionCallFailed", triggersOrphanMitigation: false, }, { - name: "400", - statusCode: 400, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, + name: "400", + statusCode: 400, + provisionErrorReason: "ProvisionCallFailed", + failReason: "ClusterServiceBrokerReturnedFailure", }, { - name: "other 4XX", - statusCode: 403, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + name: "other 4XX", + statusCode: 403, + provisionErrorReason: "ProvisionCallFailed", }, { name: "5XX", statusCode: 500, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + provisionErrorReason: "ProvisionCallFailed", triggersOrphanMitigation: true, }, { name: "non-url transport error", nonHTTPResponseError: fmt.Errorf("non-url error"), - conditionReason: "ProvisionedSuccessfully", + provisionErrorReason: "ErrorCallingProvision", }, { name: "non-timeout url error", @@ -1079,7 +1073,7 @@ func FlakeTestCreateServiceInstanceWithProvisionFailure(t *testing.T) { URL: "https://fakebroker.com/v2/service_instances/instance_id", Err: fmt.Errorf("non-timeout error"), }, - conditionReason: "ProvisionedSuccessfully", + provisionErrorReason: "ErrorCallingProvision", }, { name: "network timeout", @@ -1088,14 +1082,18 @@ func FlakeTestCreateServiceInstanceWithProvisionFailure(t *testing.T) { URL: "https://fakebroker.com/v2/service_instances/instance_id", Err: TimeoutError("timeout error"), }, - conditionReason: "ProvisionedSuccessfully", - expectFailCondition: false, + provisionErrorReason: "ErrorCallingProvision", triggersOrphanMitigation: true, }, } for _, tc := range cases { tc := tc t.Run(tc.name, func(t *testing.T) { + + respondWithProvisionSuccess := int32(0) + respondWithDeprovisionSuccess := int32(0) + blockDeprovision := int32(1) + //t.Parallel() ct := &controllerTest{ t: t, @@ -1112,71 +1110,117 @@ func FlakeTestCreateServiceInstanceWithProvisionFailure(t *testing.T) { } } ct.osbClient.ProvisionReaction = fakeosb.DynamicProvisionReaction( - getProvisionResponseByPollCountReactions(2, []fakeosb.ProvisionReaction{ - fakeosb.ProvisionReaction{ - Error: reactionError, - }, - fakeosb.ProvisionReaction{ - Response: &osb.ProvisionResponse{}, - }, - })) + func(r *osb.ProvisionRequest) (*osb.ProvisionResponse, error) { + if atomic.LoadInt32(&respondWithProvisionSuccess) != 0 { + return &osb.ProvisionResponse{}, nil + } else { + return nil, reactionError + } + }) ct.osbClient.DeprovisionReaction = fakeosb.DynamicDeprovisionReaction( - getDeprovisionResponseByPollCountReactions(2, []fakeosb.DeprovisionReaction{ - fakeosb.DeprovisionReaction{ - Error: osb.HTTPStatusCodeError{ + func(r *osb.DeprovisionRequest) (*osb.DeprovisionResponse, error) { + if atomic.LoadInt32(&respondWithDeprovisionSuccess) != 0 { + return &osb.DeprovisionResponse{}, nil + } else { + for atomic.LoadInt32(&blockDeprovision) != 0 { + time.Sleep(100 * time.Millisecond) + } + return nil, osb.HTTPStatusCodeError{ StatusCode: 500, ErrorMessage: strPtr("temporary deprovision error"), - }, - }, - fakeosb.DeprovisionReaction{ - Response: &osb.DeprovisionResponse{}, - }, - })) + } + } + }) }, } ct.run(func(ct *controllerTest) { - var condition v1beta1.ServiceInstanceCondition - if tc.expectFailCondition { - // Instance should get stuck in a Failed condition - condition = v1beta1.ServiceInstanceCondition{ - Type: v1beta1.ServiceInstanceConditionFailed, - Status: v1beta1.ConditionTrue, - Reason: tc.conditionReason, - } - } else { - // Instance provisioning should be retried and succeed - condition = v1beta1.ServiceInstanceCondition{ - Type: v1beta1.ServiceInstanceConditionReady, - Status: v1beta1.ConditionTrue, - Reason: tc.conditionReason, - } + // Wait for the provision to fail + condition := v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionFalse, + Reason: tc.provisionErrorReason, + } + if tc.triggersOrphanMitigation { + condition.Reason = "StartingInstanceOrphanMitigation" } if err := util.WaitForInstanceCondition(ct.client, testNamespace, testInstanceName, condition); err != nil { - t.Fatalf("error waiting for instance condition: %v", err) + t.Fatalf("error waiting for provision to fail: %v", err) } - if tc.expectFailCondition { - if err := util.WaitForInstanceProcessedGeneration(ct.client, ct.instance.Namespace, ct.instance.Name, 1); err != nil { - t.Fatalf("error waiting for instance reconciliation to complete: %v", err) - } + // Assert that the latest generation has been observed + instance, err := ct.client.ServiceInstances(testNamespace).Get(testInstanceName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("error getting instance: %v", err) + } + if e, a := int64(1), instance.Status.ObservedGeneration; e != a { + t.Fatalf("unexpected observed generation: expected %v, got %v", e, a) } - brokerActions := ct.osbClient.Actions() - fmt.Printf("%#v", brokerActions) + // If the provision failed with a terminating failure + if tc.failReason != "" { + util.AssertServiceInstanceCondition(t, instance, v1beta1.ServiceInstanceConditionFailed, v1beta1.ConditionTrue, tc.failReason) + if e, a := 0, len(findBrokerActions(t, ct.osbClient, fakeosb.DeprovisionInstance)); e != a { + t.Fatalf("unexpected calls to deprovision instance: expected %v, got %v", e, a) + } + atomic.StoreInt32(&blockDeprovision, 0) + atomic.StoreInt32(&respondWithDeprovisionSuccess, 1) + return + } - // Ensure that we meet expectations on deprovision requests for orphan mitigation - deprovisionActions := findBrokerActions(t, ct.osbClient, fakeosb.DeprovisionInstance) + // Assert that the orphan mitigation reason was set correctly if tc.triggersOrphanMitigation { - if len(deprovisionActions) == 0 { - t.Fatal("expected orphan mitigation deprovision request to occur") + util.AssertServiceInstanceCondition(t, instance, v1beta1.ServiceInstanceConditionOrphanMitigation, v1beta1.ConditionTrue, tc.provisionErrorReason) + if !instance.Status.OrphanMitigationInProgress { + t.Fatalf("expected orphan mitigation to be in progress") } } else { - if len(deprovisionActions) != 0 { - t.Fatal("unexpected deprovision requests") + util.AssertServiceInstanceCondition(t, instance, v1beta1.ServiceInstanceConditionOrphanMitigation, v1beta1.ConditionFalse) + if instance.Status.OrphanMitigationInProgress { + t.Fatalf("expected orphan mitigation to not be in progress") + } + } + + // Now that the provision error conditions have been asserted, allow the broker to send its response to the deprovision request + atomic.StoreInt32(&blockDeprovision, 0) + + if tc.triggersOrphanMitigation { + // Assert that the ready condition is set to Unknown when the deprovision request fails + if err := util.WaitForInstanceCondition(ct.client, testNamespace, testInstanceName, v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionUnknown, + }); err != nil { + t.Fatalf("error waiting for instance deprovision to fail: %v", err) } } - // All instances should eventually succeed + // Now that everything surround the failed provision has been asserted, allow provision requests + // to succeed. Also, allow orphan mitigation to complete by allowing deprovision requests to succeed. + atomic.StoreInt32(&respondWithProvisionSuccess, 1) + atomic.StoreInt32(&respondWithDeprovisionSuccess, 1) + + // Wait for the instance to be provisioned successfully + if err := util.WaitForInstanceCondition(ct.client, testNamespace, testInstanceName, v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionTrue, + }); err != nil { + t.Fatalf("error waiting for instance to be provisioned: %v", err) + } + + // Assert that the observed generation is up-to-date, that orphan mitigation is not in progress, + // and that the instance is not in a failed state. + instance, err = ct.client.ServiceInstances(testNamespace).Get(testInstanceName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("error getting instance: %v", err) + } + if g, og := instance.Generation, instance.Status.ObservedGeneration; g != og { + t.Fatalf("latest generation not observed: generation: %v, observed: %v", g, og) + } + if instance.Status.OrphanMitigationInProgress { + t.Fatalf("unexpected orphan mitigation in progress") + } + util.AssertServiceInstanceCondition(t, instance, v1beta1.ServiceInstanceConditionFailed, v1beta1.ConditionFalse) + + // Assert that the last broker action was a provision-instance request. getLastBrokerAction(t, ct.osbClient, fakeosb.ProvisionInstance) }) }) diff --git a/test/util/util.go b/test/util/util.go index c3a81a1ef89..2d3740a469a 100644 --- a/test/util/util.go +++ b/test/util/util.go @@ -345,7 +345,9 @@ func AssertServiceInstanceCondition(t *testing.T, instance *v1beta1.ServiceInsta } if !foundCondition { - t.Fatalf("%v condition not found", conditionType) + if status != v1beta1.ConditionFalse || len(reason) != 0 { + t.Fatalf("%v condition not found", conditionType) + } } } @@ -366,6 +368,8 @@ func AssertServiceBindingCondition(t *testing.T, binding *v1beta1.ServiceBinding } if !foundCondition { - t.Fatalf("%v condition not found", conditionType) + if status != v1beta1.ConditionFalse || len(reason) != 0 { + t.Fatalf("%v condition not found", conditionType) + } } }