From 7996d8955db5fd6c5824ae3766441f8e15de1d54 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 26 Feb 2018 15:42:34 +1100 Subject: [PATCH 01/20] Make most HTTP errors temporary (retriable) --- pkg/controller/controller.go | 6 +++ pkg/controller/controller_instance.go | 70 ++++++++++++++++++--------- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 799078af54d..cb283e832d6 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -668,6 +668,12 @@ func shouldStartOrphanMitigation(statusCode int) bool { is5XX } +// isTerminalHttpStatus returns whether an error with the given HTTP status +// code is retriable. +func isRetriableHttpStatus(statusCode int) bool { + return statusCode != 400 +} + // ReconciliationAction reprents a type of action the reconciler should take // for a resource. type ReconciliationAction string diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index 070688cddbb..4635f16dbae 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -371,28 +371,32 @@ func (c *controller) reconcileServiceInstanceAdd(instance *v1beta1.ServiceInstan response, err := brokerClient.ProvisionInstance(request) if err != nil { - // A failure HTTP response code is treated as a terminal - // failure. Depending on the specific response, we may also - // need to initiate orphan mitigation. if httpErr, ok := osb.IsHTTPError(err); ok { msg := fmt.Sprintf( "Error provisioning ServiceInstance of %s at ClusterServiceBroker %q: %s", pretty.ClusterServiceClassName(serviceClass), brokerName, httpErr, ) readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, errorProvisionCallFailedReason, msg) - failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, "ClusterServiceBrokerReturnedFailure", msg) - return c.processProvisionFailure(instance, readyCond, failedCond, shouldStartOrphanMitigation(httpErr.StatusCode)) + // Depending on the specific response, we may need to initiate orphan mitigation. + shouldMitigateOrphan := shouldStartOrphanMitigation(httpErr.StatusCode) + if isRetriableHttpStatus(httpErr.StatusCode) { + return c.processTemporaryProvisionFailure(instance, readyCond, shouldMitigateOrphan) + } else { + // A failure with a given HTTP response code is treated as a terminal + // failure. + failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, "ClusterServiceBrokerReturnedFailure", msg) + return c.processTerminalProvisionFailure(instance, readyCond, failedCond, shouldMitigateOrphan) + } } reason := errorErrorCallingProvisionReason - // A timeout error is considered a terminal failure and we + // A timeout error is considered a retriable error, but we // should initiate orphan mitigation. if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() { - msg := fmt.Sprintf("Communication with the ClusterServiceBroker timed out; operation will not be retried: %v", urlErr) + msg := fmt.Sprintf("Communication with the ClusterServiceBroker timed out; operation will be retried: %v", urlErr) readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, reason, msg) - failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, reason, msg) - return c.processProvisionFailure(instance, readyCond, failedCond, true) + return c.processTemporaryProvisionFailure(instance, readyCond, true) } // All other errors should be retried, unless the @@ -403,7 +407,7 @@ func (c *controller) reconcileServiceInstanceAdd(instance *v1beta1.ServiceInstan if c.reconciliationRetryDurationExceeded(instance.Status.OperationStartTime) { msg := "Stopping reconciliation retries because too much time has elapsed" failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, errorReconciliationRetryTimeoutReason, msg) - return c.processProvisionFailure(instance, readyCond, failedCond, false) + return c.processTerminalProvisionFailure(instance, readyCond, failedCond, false) } return c.processServiceInstanceOperationError(instance, readyCond) @@ -761,8 +765,7 @@ func (c *controller) pollServiceInstance(instance *v1beta1.ServiceInstance) erro reason := errorProvisionCallFailedReason message := "Provision call failed: " + description readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, reason, message) - failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, reason, message) - err = c.processProvisionFailure(instance, readyCond, failedCond, false) + err = c.processTemporaryProvisionFailure(instance, readyCond, false) default: reason := errorUpdateInstanceCallFailedReason message := "Update call failed: " + description @@ -822,7 +825,7 @@ func (c *controller) processServiceInstancePollingFailureRetryTimeout(instance * case provisioning: // always finish polling instance, as triggering OM will return an error c.finishPollingServiceInstance(instance) - return c.processProvisionFailure(instance, readyCond, failedCond, true) + return c.processTerminalProvisionFailure(instance, readyCond, failedCond, true) default: readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, errorReconciliationRetryTimeoutReason, msg) err = c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) @@ -1577,24 +1580,39 @@ func (c *controller) processProvisionSuccess(instance *v1beta1.ServiceInstance, return nil } -// processProvisionFailure handles the logging and updating of a +// processTerminalProvisionFailure handles the logging and updating of a // ServiceInstance that hit a terminal failure during provision reconciliation. -func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition, shouldMitigateOrphan bool) error { +func (c *controller) processTerminalProvisionFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition, shouldMitigateOrphan bool) error { if failedCond == nil { return fmt.Errorf("failedCond must not be nil") } + return c.processProvisionFailure(instance, readyCond, failedCond, shouldMitigateOrphan) +} +// processTemporaryProvisionFailure handles the logging and updating of a +// ServiceInstance that hit a temporary error during provision reconciliation. +func (c *controller) processTemporaryProvisionFailure(instance *v1beta1.ServiceInstance, readyCond *v1beta1.ServiceInstanceCondition, shouldMitigateOrphan bool) error { + return c.processProvisionFailure(instance, readyCond, nil, shouldMitigateOrphan) +} + +// processProvisionFailure handles the logging and updating of a +// ServiceInstance that hit a temporary or a terminal failure during provision +// reconciliation. +func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition, shouldMitigateOrphan bool) error { if readyCond != nil { c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) } - c.recorder.Event(instance, corev1.EventTypeWarning, failedCond.Reason, failedCond.Message) - setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) + var errorMessage error + if failedCond != nil { + c.recorder.Event(instance, corev1.EventTypeWarning, failedCond.Reason, failedCond.Message) + setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) + errorMessage = fmt.Errorf(failedCond.Message) + } else { + errorMessage = fmt.Errorf(readyCond.Message) + } - // Need to vary return error depending on whether the worker should - // requeue this resource. - var err error if shouldMitigateOrphan { // Copy original failure reason/message to a new OrphanMitigation condition c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) @@ -1609,8 +1627,6 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, instance.Status.OperationStartTime = nil instance.Status.AsyncOpInProgress = false instance.Status.OrphanMitigationInProgress = true - - err = fmt.Errorf(failedCond.Message) } else { clearServiceInstanceCurrentOperation(instance) // Deprovisioning is not required for provisioning that has failed with an @@ -1623,7 +1639,12 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, return err } - return err + // Need to vary return error depending on whether the worker should + // requeue this resource. + if failedCond == nil || shouldMitigateOrphan { + return errorMessage + } + return nil } // processProvisionAsyncResponse handles the logging and updating @@ -1662,6 +1683,9 @@ func (c *controller) processUpdateServiceInstanceSuccess(instance *v1beta1.Servi // processUpdateServiceInstanceFailure handles the logging and updating of a // ServiceInstance that hit a terminal failure during update reconciliation. func (c *controller) processUpdateServiceInstanceFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition) error { + // TODO nilebox: We need to distinguish terminal and temporary errors there + // but we need to merge https://github.com/kubernetes-incubator/service-catalog/pull/1748 + // first that makes this method consistent with processProvisioningFailure() c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) From 4847388278202947f9e49f4ca4ec43a20b4c58d6 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 26 Feb 2018 15:43:43 +1100 Subject: [PATCH 02/20] 408 status code doesn't require orphan mitigation --- pkg/controller/controller.go | 1 - pkg/controller/controller_instance.go | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index cb283e832d6..dcf4ecf6443 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -664,7 +664,6 @@ func shouldStartOrphanMitigation(statusCode int) bool { is5XX := (statusCode >= 500 && statusCode < 600) return (is2XX && statusCode != http.StatusOK) || - statusCode == http.StatusRequestTimeout || is5XX } diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index 4635f16dbae..b4af0d66ccb 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -381,12 +381,11 @@ func (c *controller) reconcileServiceInstanceAdd(instance *v1beta1.ServiceInstan shouldMitigateOrphan := shouldStartOrphanMitigation(httpErr.StatusCode) if isRetriableHttpStatus(httpErr.StatusCode) { return c.processTemporaryProvisionFailure(instance, readyCond, shouldMitigateOrphan) - } else { - // A failure with a given HTTP response code is treated as a terminal - // failure. - failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, "ClusterServiceBrokerReturnedFailure", msg) - return c.processTerminalProvisionFailure(instance, readyCond, failedCond, shouldMitigateOrphan) } + // A failure with a given HTTP response code is treated as a terminal + // failure. + failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, "ClusterServiceBrokerReturnedFailure", msg) + return c.processTerminalProvisionFailure(instance, readyCond, failedCond, shouldMitigateOrphan) } reason := errorErrorCallingProvisionReason From cb98892e8231546740b6a9533c63cfa6ef9f34e3 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 26 Feb 2018 20:56:20 +1100 Subject: [PATCH 03/20] Fix lint error --- pkg/controller/controller.go | 2 +- pkg/controller/controller_instance.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index dcf4ecf6443..59157d3252b 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -669,7 +669,7 @@ func shouldStartOrphanMitigation(statusCode int) bool { // isTerminalHttpStatus returns whether an error with the given HTTP status // code is retriable. -func isRetriableHttpStatus(statusCode int) bool { +func isRetriableHTTPStatus(statusCode int) bool { return statusCode != 400 } diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index b4af0d66ccb..ccfbb7099b5 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -379,7 +379,7 @@ func (c *controller) reconcileServiceInstanceAdd(instance *v1beta1.ServiceInstan readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, errorProvisionCallFailedReason, msg) // Depending on the specific response, we may need to initiate orphan mitigation. shouldMitigateOrphan := shouldStartOrphanMitigation(httpErr.StatusCode) - if isRetriableHttpStatus(httpErr.StatusCode) { + if isRetriableHTTPStatus(httpErr.StatusCode) { return c.processTemporaryProvisionFailure(instance, readyCond, shouldMitigateOrphan) } // A failure with a given HTTP response code is treated as a terminal From a40fc51ffc8b666072b620dd1fbd9d0868ff2e8b Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Wed, 28 Feb 2018 13:23:31 +1100 Subject: [PATCH 04/20] Fix review comments --- pkg/controller/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 59157d3252b..48fe4646a96 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -667,7 +667,7 @@ func shouldStartOrphanMitigation(statusCode int) bool { is5XX } -// isTerminalHttpStatus returns whether an error with the given HTTP status +// isRetriableHTTPStatus returns whether an error with the given HTTP status // code is retriable. func isRetriableHTTPStatus(statusCode int) bool { return statusCode != 400 From 0aa8f2957f79bfdf5ed820e3b6104e37a94f463a Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 5 Mar 2018 16:03:21 +1100 Subject: [PATCH 05/20] Fix tests --- pkg/controller/controller_binding_test.go | 2 +- pkg/controller/controller_instance_test.go | 88 ++++++++++++++++++++-- pkg/controller/controller_test.go | 6 +- 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/pkg/controller/controller_binding_test.go b/pkg/controller/controller_binding_test.go index 5ab6a7a792c..a3172755d49 100644 --- a/pkg/controller/controller_binding_test.go +++ b/pkg/controller/controller_binding_test.go @@ -2375,7 +2375,7 @@ func TestReconcileBindingWithSetOrphanMitigation(t *testing.T) { bindReactionError: osb.HTTPStatusCodeError{ StatusCode: 408, }, - setOrphanMitigation: true, + setOrphanMitigation: false, shouldReturnError: false, }, { diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index cf63348c9e8..bffcac97669 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -919,16 +919,90 @@ func TestReconcileServiceInstanceWithProvisionCallFailure(t *testing.T) { } } -// TestReconcileServiceInstanceWithProvisionFailure tests that when the -// provision call to the broker fails with an HTTP error, the ready condition +// TestReconcileServiceInstanceWithTemporaryProvisionFailure tests that when the +// provision call to the broker fails with a retriable HTTP error, the ready condition +// becomes false, and the failure condition is not set. +func TestReconcileServiceInstanceWithTemporaryProvisionFailure(t *testing.T) { + fakeKubeClient, fakeCatalogClient, fakeClusterServiceBrokerClient, testController, sharedInformers := newTestController(t, fakeosb.FakeClientConfiguration{ + ProvisionReaction: &fakeosb.ProvisionReaction{ + Error: osb.HTTPStatusCodeError{ + StatusCode: http.StatusInternalServerError, + ErrorMessage: strPtr("InternalServerError"), + Description: strPtr("Something went wrong!"), + }, + }, + }) + + sharedInformers.ClusterServiceBrokers().Informer().GetStore().Add(getTestClusterServiceBroker()) + sharedInformers.ClusterServiceClasses().Informer().GetStore().Add(getTestClusterServiceClass()) + sharedInformers.ClusterServicePlans().Informer().GetStore().Add(getTestClusterServicePlan()) + + instance := getTestServiceInstanceWithRefs() + + if err := testController.reconcileServiceInstance(instance); err == nil { + t.Fatalf("Should not be able to make the ServiceInstance") + } + + brokerActions := fakeClusterServiceBrokerClient.Actions() + assertNumberOfClusterServiceBrokerActions(t, brokerActions, 1) + assertProvision(t, brokerActions[0], &osb.ProvisionRequest{ + AcceptsIncomplete: true, + InstanceID: testServiceInstanceGUID, + ServiceID: testClusterServiceClassGUID, + PlanID: testClusterServicePlanGUID, + Context: map[string]interface{}{ + "platform": "kubernetes", + "namespace": "test-ns", + }, + }) + + // verify no kube resources created + // One single action comes from getting namespace uid + kubeActions := fakeKubeClient.Actions() + if err := checkKubeClientActions(kubeActions, []kubeClientAction{ + {verb: "get", resourceName: "namespaces", checkType: checkGetActionType}, + }); err != nil { + t.Fatal(err) + } + + actions := fakeCatalogClient.Actions() + assertNumberOfActions(t, actions, 2) + + updatedServiceInstance := assertUpdateStatus(t, actions[0], instance) + assertServiceInstanceOperationInProgress(t, updatedServiceInstance, v1beta1.ServiceInstanceOperationProvision, testClusterServicePlanName, testClusterServicePlanGUID, instance) + + updatedServiceInstance = assertUpdateStatus(t, actions[1], instance) + assertServiceInstanceRequestFailingErrorStartOrphanMitigation( + t, + updatedServiceInstance, + v1beta1.ServiceInstanceOperationProvision, + startingInstanceOrphanMitigationReason, + "", + instance, + ) + + events := getRecordedEvents(testController) + + expectedEvent := warningEventBuilder(errorErrorCallingProvisionReason).msg( + "The provision call failed and will be retried:", + ).msgf( + "Error communicating with broker for provisioning:", + ).msg("fake creation failure") + if err := checkEvents(events, expectedEvent.stringArr()); err != nil { + t.Fatal(err) + } +} + +// TestReconcileServiceInstanceWithTerminalProvisionFailure tests that when the +// provision call to the broker fails with an 400 HTTP error, the ready condition // becomes false, and the failure condition is set. -func TestReconcileServiceInstanceWithProvisionFailure(t *testing.T) { +func TestReconcileServiceInstanceWithTerminalProvisionFailure(t *testing.T) { fakeKubeClient, fakeCatalogClient, fakeClusterServiceBrokerClient, testController, sharedInformers := newTestController(t, fakeosb.FakeClientConfiguration{ ProvisionReaction: &fakeosb.ProvisionReaction{ Error: osb.HTTPStatusCodeError{ - StatusCode: http.StatusConflict, - ErrorMessage: strPtr("OutOfQuota"), - Description: strPtr("You're out of quota!"), + StatusCode: http.StatusBadRequest, + ErrorMessage: strPtr("BadRequest"), + Description: strPtr("Your parameters are incorrect!"), }, }, }) @@ -989,7 +1063,7 @@ func TestReconcileServiceInstanceWithProvisionFailure(t *testing.T) { message := fmt.Sprintf( "Error provisioning ServiceInstance of ClusterServiceClass (K8S: %q ExternalName: %q) at ClusterServiceBroker %q: Status: %v; ErrorMessage: %s", - "SCGUID", "test-serviceclass", "test-broker", 409, "OutOfQuota; Description: You're out of quota!; ResponseError: ", + "SCGUID", "test-serviceclass", "test-broker", 400, "BadRequest; Description: Your parameters are incorrect!; ResponseError: ", ) expectedEvents := []string{ warningEventBuilder(errorProvisionCallFailedReason).msg(message).String(), diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 666e5eac969..f01c426ef6c 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -2243,7 +2243,11 @@ func assertServiceInstanceRequestFailingError(t *testing.T, obj runtime.Object, } assertServiceInstanceReadyCondition(t, obj, readyStatus, readyReason) - assertServiceInstanceCondition(t, obj, v1beta1.ServiceInstanceConditionFailed, v1beta1.ConditionTrue, failureReason) + if failureReason == "" { + assertServiceInstanceConditionMissing(t, obj, v1beta1.ServiceInstanceConditionFailed) + } else { + assertServiceInstanceCondition(t, obj, v1beta1.ServiceInstanceConditionFailed, v1beta1.ConditionTrue, failureReason) + } assertServiceInstanceOperationStartTimeSet(t, obj, false) assertAsyncOpInProgressFalse(t, obj) assertServiceInstanceExternalPropertiesUnchanged(t, obj, originalInstance) From 6a95318800e837de41d089979e4d23026cf13417 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 5 Mar 2018 16:41:34 +1100 Subject: [PATCH 06/20] Fix tests --- pkg/controller/controller_instance_test.go | 33 ++++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index bffcac97669..0b49c16f1ec 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -983,12 +983,18 @@ func TestReconcileServiceInstanceWithTemporaryProvisionFailure(t *testing.T) { events := getRecordedEvents(testController) - expectedEvent := warningEventBuilder(errorErrorCallingProvisionReason).msg( - "The provision call failed and will be retried:", - ).msgf( - "Error communicating with broker for provisioning:", - ).msg("fake creation failure") - if err := checkEvents(events, expectedEvent.stringArr()); err != nil { + message := fmt.Sprintf( + "Error provisioning ServiceInstance of ClusterServiceClass (K8S: %q ExternalName: %q) at ClusterServiceBroker %q: Status: %v; ErrorMessage: %s", + "SCGUID", "test-serviceclass", "test-broker", 500, "InternalServerError; Description: Something went wrong!; ResponseError: ", + ) + expectedProvisionCallEvent := warningEventBuilder(errorProvisionCallFailedReason).msg(message) + expectedOrphanMitigationEvent := warningEventBuilder(startingInstanceOrphanMitigationReason). + msg("The instance provision call failed with an ambiguous error; attempting to deprovision the instance in order to mitigate an orphaned resource") + expectedEvents := []string{ + expectedProvisionCallEvent.String(), + expectedOrphanMitigationEvent.String(), + } + if err := checkEvents(events, expectedEvents); err != nil { t.Fatal(err) } } @@ -3618,6 +3624,7 @@ func TestReconcileServiceInstanceWithHTTPStatusCodeErrorOrphanMitigation(t *test name string statusCode int triggersOrphanMitigation bool + terminalFailure bool }{ { name: "Status OK", @@ -3634,10 +3641,16 @@ func TestReconcileServiceInstanceWithHTTPStatusCodeErrorOrphanMitigation(t *test statusCode: 300, triggersOrphanMitigation: false, }, + { + name: "400", + statusCode: 400, + triggersOrphanMitigation: false, + terminalFailure: true, + }, { name: "408", statusCode: 408, - triggersOrphanMitigation: true, + triggersOrphanMitigation: false, }, { name: "other 4XX", @@ -3700,8 +3713,10 @@ func TestReconcileServiceInstanceWithHTTPStatusCodeErrorOrphanMitigation(t *test } } else { if err != nil { - t.Errorf("%v: Reconciler should treat as terminal condition and not requeue", tc.name) - continue + if tc.terminalFailure { + t.Errorf("%v: Reconciler should treat as terminal condition and not requeue", tc.name) + continue + } } } } From 126638af2c8aa2770a37383f3c431299cbbfc725 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 5 Mar 2018 16:43:51 +1100 Subject: [PATCH 07/20] Leave a TODO --- pkg/controller/controller_instance_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index 0b49c16f1ec..5d0a731cdfa 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -2310,6 +2310,7 @@ func TestPollServiceInstanceSuccessProvisioningWithOperation(t *testing.T) { t.Fatalf("pollServiceInstance failed: %s", err) } + // TODO nilebox: fix test: with new retry behavior we will requeue the failed provisioning/update if testController.instancePollingQueue.NumRequeues(instanceKey) != 0 { t.Fatalf("Expected polling queue to not have any record of test instance as polling should have completed") } From 4c4c3276eb101754196a73c475ef3994e7ac654e Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Mon, 5 Mar 2018 20:56:24 +1100 Subject: [PATCH 08/20] Fix test for polling with 'failed' state --- pkg/controller/controller_instance_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index 5d0a731cdfa..12c37e1faa4 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -2310,9 +2310,8 @@ func TestPollServiceInstanceSuccessProvisioningWithOperation(t *testing.T) { t.Fatalf("pollServiceInstance failed: %s", err) } - // TODO nilebox: fix test: with new retry behavior we will requeue the failed provisioning/update - if testController.instancePollingQueue.NumRequeues(instanceKey) != 0 { - t.Fatalf("Expected polling queue to not have any record of test instance as polling should have completed") + if testController.instancePollingQueue.NumRequeues(instanceKey) == 0 { + t.Fatalf("Expected polling queue to have requeues of test instance after polling have completed with a 'failed' state") } brokerActions := fakeClusterServiceBrokerClient.Actions() @@ -2393,7 +2392,7 @@ func TestPollServiceInstanceFailureProvisioningWithOperation(t *testing.T) { updatedServiceInstance, v1beta1.ServiceInstanceOperationProvision, errorProvisionCallFailedReason, - errorProvisionCallFailedReason, + "", instance, ) } From f421a1ef553a5ec7e37d07d0f1c3f659f117b324 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Tue, 6 Mar 2018 10:23:04 +1100 Subject: [PATCH 09/20] Fix failure with retries test --- pkg/controller/controller_instance.go | 22 ++- test/integration/controller_instance_test.go | 134 +++++++++---------- 2 files changed, 86 insertions(+), 70 deletions(-) diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index ccfbb7099b5..c3ed9403dd5 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -1632,6 +1632,10 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, // error that doesn't require orphan mitigation instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusNotRequired instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration + // TODO nilebox: If we update ReconciledGeneration on failure, API server rejects + // the status update request with error: + // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" + //instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration } if _, err := c.updateServiceInstanceStatus(instance); err != nil { @@ -1690,7 +1694,10 @@ func (c *controller) processUpdateServiceInstanceFailure(instance *v1beta1.Servi setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) clearServiceInstanceCurrentOperation(instance) - instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration + // TODO nilebox: If we update ReconciledGeneration on failure, API server rejects + // the status update request with error: + // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" + // instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration if _, err := c.updateServiceInstanceStatus(instance); err != nil { return err @@ -1734,7 +1741,13 @@ func (c *controller) processDeprovisionSuccess(instance *v1beta1.ServiceInstance instance.Status.ExternalProperties = nil instance.Status.ProvisionStatus = v1beta1.ServiceInstanceProvisionStatusNotProvisioned instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusSucceeded - instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration + // TODO nilebox: If we update ReconciledGeneration on orphan mitigation, + // and then retry provisioning, API server rejects + // the status update request with error: + // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" + if !mitigatingOrphan { + instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration + } if mitigatingOrphan { if _, err := c.updateServiceInstanceStatus(instance); err != nil { @@ -1778,7 +1791,10 @@ func (c *controller) processDeprovisionFailure(instance *v1beta1.ServiceInstance } clearServiceInstanceCurrentOperation(instance) - instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration + // TODO nilebox: If we update ReconciledGeneration on failure, API server rejects + // the status update request with error: + // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" + // instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusFailed if _, err := c.updateServiceInstanceStatus(instance); err != nil { diff --git a/test/integration/controller_instance_test.go b/test/integration/controller_instance_test.go index 845a5202039..9ef619f06d5 100644 --- a/test/integration/controller_instance_test.go +++ b/test/integration/controller_instance_test.go @@ -20,7 +20,7 @@ import ( "errors" "fmt" "net/http" - "net/url" + _ "net/url" "reflect" "testing" @@ -925,77 +925,77 @@ func TestCreateServiceInstanceWithProvisionFailure(t *testing.T) { expectFailCondition bool triggersOrphanMitigation bool }{ - { - name: "Status OK", - statusCode: http.StatusOK, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - }, + //{ + // name: "Status OK", + // statusCode: http.StatusOK, + // conditionReason: "ProvisionCallFailed", + // expectFailCondition: false, + //}, { name: "Status Created", statusCode: http.StatusCreated, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - triggersOrphanMitigation: true, - }, - { - name: "other 2xx", - statusCode: http.StatusNoContent, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - triggersOrphanMitigation: true, - }, - { - name: "3XX", - statusCode: 300, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - }, - { - name: "Status Request Timeout", - statusCode: http.StatusRequestTimeout, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - triggersOrphanMitigation: true, - }, - { - name: "other 4XX", - statusCode: 400, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - }, - { - name: "5XX", - statusCode: 500, - conditionReason: "ClusterServiceBrokerReturnedFailure", - expectFailCondition: true, - triggersOrphanMitigation: true, - }, - { - name: "non-url transport error", - nonHTTPResponseError: fmt.Errorf("non-url error"), - conditionReason: "ErrorCallingProvision", - }, - { - name: "non-timeout url error", - nonHTTPResponseError: &url.Error{ - Op: "Put", - URL: "https://fakebroker.com/v2/service_instances/instance_id", - Err: fmt.Errorf("non-timeout error"), - }, - conditionReason: "ErrorCallingProvision", - }, - { - name: "network timeout", - nonHTTPResponseError: &url.Error{ - Op: "Put", - URL: "https://fakebroker.com/v2/service_instances/instance_id", - Err: TimeoutError("timeout error"), - }, - conditionReason: "ErrorCallingProvision", - expectFailCondition: true, + conditionReason: "ProvisionCallFailed", + expectFailCondition: false, triggersOrphanMitigation: true, }, + //{ + // name: "other 2xx", + // statusCode: http.StatusNoContent, + // conditionReason: "ProvisionCallFailed", + // expectFailCondition: false, + // triggersOrphanMitigation: true, + //}, + //{ + // name: "3XX", + // statusCode: 300, + // conditionReason: "ClusterServiceBrokerReturnedFailure", + // expectFailCondition: false, + //}, + //{ + // name: "Status Request Timeout", + // statusCode: http.StatusRequestTimeout, + // conditionReason: "ProvisionCallFailed", + // expectFailCondition: false, + // triggersOrphanMitigation: true, + //}, + //{ + // name: "other 4XX", + // statusCode: 400, + // conditionReason: "ClusterServiceBrokerReturnedFailure", + // expectFailCondition: true, + //}, + //{ + // name: "5XX", + // statusCode: 500, + // conditionReason: "ProvisionCallFailed", + // expectFailCondition: false, + // triggersOrphanMitigation: true, + //}, + //{ + // name: "non-url transport error", + // nonHTTPResponseError: fmt.Errorf("non-url error"), + // conditionReason: "ErrorCallingProvision", + //}, + //{ + // name: "non-timeout url error", + // nonHTTPResponseError: &url.Error{ + // Op: "Put", + // URL: "https://fakebroker.com/v2/service_instances/instance_id", + // Err: fmt.Errorf("non-timeout error"), + // }, + // conditionReason: "ErrorCallingProvision", + //}, + //{ + // name: "network timeout", + // nonHTTPResponseError: &url.Error{ + // Op: "Put", + // URL: "https://fakebroker.com/v2/service_instances/instance_id", + // Err: TimeoutError("timeout error"), + // }, + // conditionReason: "ErrorCallingProvision", + // expectFailCondition: false, + // triggersOrphanMitigation: true, + //}, } for _, tc := range cases { tc := tc From bf2cc12d7cbafeafc1e36c9cc966eceb3ee7a5aa Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Tue, 6 Mar 2018 15:22:55 +1100 Subject: [PATCH 10/20] Fix tests for retries after provisioning fails --- pkg/controller/controller_test.go | 2 +- test/integration/controller_instance_test.go | 196 ++++++++++--------- test/integration/controller_test.go | 62 ++++++ 3 files changed, 171 insertions(+), 89 deletions(-) diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index f01c426ef6c..461e4a09ea9 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -2257,7 +2257,7 @@ func assertServiceInstanceRequestFailingError(t *testing.T, obj runtime.Object, func assertServiceInstanceProvisionRequestFailingErrorNoOrphanMitigation(t *testing.T, obj runtime.Object, operation v1beta1.ServiceInstanceOperation, readyReason string, failureReason string, originalInstance *v1beta1.ServiceInstance) { assertServiceInstanceRequestFailingError(t, obj, operation, readyReason, failureReason, false, originalInstance) assertServiceInstanceCurrentOperationClear(t, obj) - assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Generation) + assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Status.ReconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, originalInstance.Generation) assertServiceInstanceProvisioned(t, obj, originalInstance.Status.ProvisionStatus) assertServiceInstanceOrphanMitigationInProgressFalse(t, obj) diff --git a/test/integration/controller_instance_test.go b/test/integration/controller_instance_test.go index 9ef619f06d5..6947500d80d 100644 --- a/test/integration/controller_instance_test.go +++ b/test/integration/controller_instance_test.go @@ -20,7 +20,7 @@ import ( "errors" "fmt" "net/http" - _ "net/url" + "net/url" "reflect" "testing" @@ -925,77 +925,83 @@ func TestCreateServiceInstanceWithProvisionFailure(t *testing.T) { expectFailCondition bool triggersOrphanMitigation bool }{ - //{ - // name: "Status OK", - // statusCode: http.StatusOK, - // conditionReason: "ProvisionCallFailed", - // expectFailCondition: false, - //}, + { + name: "Status OK", + statusCode: http.StatusOK, + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + }, { name: "Status Created", statusCode: http.StatusCreated, - conditionReason: "ProvisionCallFailed", + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + triggersOrphanMitigation: true, + }, + { + name: "other 2xx", + statusCode: http.StatusNoContent, + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + triggersOrphanMitigation: true, + }, + { + name: "3XX", + statusCode: 300, + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + }, + { + name: "Status Request Timeout", + statusCode: http.StatusRequestTimeout, + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + triggersOrphanMitigation: false, + }, + { + name: "400", + statusCode: 400, + conditionReason: "ClusterServiceBrokerReturnedFailure", + expectFailCondition: true, + }, + { + name: "other 4XX", + statusCode: 403, + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + }, + { + name: "5XX", + statusCode: 500, + conditionReason: "ProvisionedSuccessfully", + expectFailCondition: false, + triggersOrphanMitigation: true, + }, + { + name: "non-url transport error", + nonHTTPResponseError: fmt.Errorf("non-url error"), + conditionReason: "ProvisionedSuccessfully", + }, + { + name: "non-timeout url error", + nonHTTPResponseError: &url.Error{ + Op: "Put", + URL: "https://fakebroker.com/v2/service_instances/instance_id", + Err: fmt.Errorf("non-timeout error"), + }, + conditionReason: "ProvisionedSuccessfully", + }, + { + name: "network timeout", + nonHTTPResponseError: &url.Error{ + Op: "Put", + URL: "https://fakebroker.com/v2/service_instances/instance_id", + Err: TimeoutError("timeout error"), + }, + conditionReason: "ProvisionedSuccessfully", expectFailCondition: false, triggersOrphanMitigation: true, }, - //{ - // name: "other 2xx", - // statusCode: http.StatusNoContent, - // conditionReason: "ProvisionCallFailed", - // expectFailCondition: false, - // triggersOrphanMitigation: true, - //}, - //{ - // name: "3XX", - // statusCode: 300, - // conditionReason: "ClusterServiceBrokerReturnedFailure", - // expectFailCondition: false, - //}, - //{ - // name: "Status Request Timeout", - // statusCode: http.StatusRequestTimeout, - // conditionReason: "ProvisionCallFailed", - // expectFailCondition: false, - // triggersOrphanMitigation: true, - //}, - //{ - // name: "other 4XX", - // statusCode: 400, - // conditionReason: "ClusterServiceBrokerReturnedFailure", - // expectFailCondition: true, - //}, - //{ - // name: "5XX", - // statusCode: 500, - // conditionReason: "ProvisionCallFailed", - // expectFailCondition: false, - // triggersOrphanMitigation: true, - //}, - //{ - // name: "non-url transport error", - // nonHTTPResponseError: fmt.Errorf("non-url error"), - // conditionReason: "ErrorCallingProvision", - //}, - //{ - // name: "non-timeout url error", - // nonHTTPResponseError: &url.Error{ - // Op: "Put", - // URL: "https://fakebroker.com/v2/service_instances/instance_id", - // Err: fmt.Errorf("non-timeout error"), - // }, - // conditionReason: "ErrorCallingProvision", - //}, - //{ - // name: "network timeout", - // nonHTTPResponseError: &url.Error{ - // Op: "Put", - // URL: "https://fakebroker.com/v2/service_instances/instance_id", - // Err: TimeoutError("timeout error"), - // }, - // conditionReason: "ErrorCallingProvision", - // expectFailCondition: false, - // triggersOrphanMitigation: true, - //}, } for _, tc := range cases { tc := tc @@ -1015,23 +1021,43 @@ func TestCreateServiceInstanceWithProvisionFailure(t *testing.T) { Description: strPtr("response description"), } } - ct.osbClient.ProvisionReaction = &fakeosb.ProvisionReaction{ - Error: reactionError, - } + ct.osbClient.ProvisionReaction = fakeosb.DynamicProvisionReaction( + getProvisionResponseByPollCountReactions(2, []fakeosb.ProvisionReaction{ + fakeosb.ProvisionReaction{ + Error: reactionError, + }, + fakeosb.ProvisionReaction{ + Response: &osb.ProvisionResponse{}, + }, + })) + ct.osbClient.DeprovisionReaction = fakeosb.DynamicDeprovisionReaction( + getDeprovisionResponseByPollCountReactions(2, []fakeosb.DeprovisionReaction{ + fakeosb.DeprovisionReaction{ + Error: osb.HTTPStatusCodeError{ + StatusCode: 500, + ErrorMessage: strPtr("temporary deprovision error"), + }, + }, + fakeosb.DeprovisionReaction{ + Response: &osb.DeprovisionResponse{}, + }, + })) }, } ct.run(func(ct *controllerTest) { var condition v1beta1.ServiceInstanceCondition if tc.expectFailCondition { + // Instance should get stuck in a Failed condition condition = v1beta1.ServiceInstanceCondition{ Type: v1beta1.ServiceInstanceConditionFailed, Status: v1beta1.ConditionTrue, Reason: tc.conditionReason, } } else { + // Instance provisioning should be retried and succeed condition = v1beta1.ServiceInstanceCondition{ Type: v1beta1.ServiceInstanceConditionReady, - Status: v1beta1.ConditionFalse, + Status: v1beta1.ConditionTrue, Reason: tc.conditionReason, } } @@ -1045,29 +1071,23 @@ func TestCreateServiceInstanceWithProvisionFailure(t *testing.T) { } } - expectedLastActionType := fakeosb.ProvisionInstance - if tc.triggersOrphanMitigation { - expectedLastActionType = fakeosb.DeprovisionInstance - } - // Ensure that the last request made to the broker was of the expected type - getLastBrokerAction(t, ct.osbClient, expectedLastActionType) + brokerActions := ct.osbClient.Actions() + fmt.Printf("%#v", brokerActions) + // Ensure that we meet expectations on deprovision requests for orphan mitigation + deprovisionActions := findBrokerActions(t, ct.osbClient, fakeosb.DeprovisionInstance) if tc.triggersOrphanMitigation { - instance, err := ct.client.ServiceInstances(ct.instance.Namespace).Get(ct.instance.Name, metav1.GetOptions{}) - if err != nil { - t.Fatalf("error getting instance %s/%s back", ct.instance.Namespace, ct.instance.Name) + if len(deprovisionActions) == 0 { + t.Fatal("expected orphan mitigation deprovision request to occur") } - util.AssertServiceInstanceCondition( - t, - instance, - v1beta1.ServiceInstanceConditionReady, - v1beta1.ConditionFalse, - "OrphanMitigationSuccessful", - ) - if e, a := v1beta1.ServiceInstanceDeprovisionStatusSucceeded, instance.Status.DeprovisionStatus; e != a { - t.Fatalf("unexpected deprovision status: expected %v, got %v", e, a) + } else { + if len(deprovisionActions) != 0 { + t.Fatal("unexpected deprovision requests") } } + + // All instances should eventually succeed + getLastBrokerAction(t, ct.osbClient, fakeosb.ProvisionInstance) }) }) } diff --git a/test/integration/controller_test.go b/test/integration/controller_test.go index b01f361b740..89174d7c7a7 100644 --- a/test/integration/controller_test.go +++ b/test/integration/controller_test.go @@ -607,6 +607,56 @@ func TestServiceBindingDeleteWithAsyncBindInProgress(t *testing.T) { } } +func getProvisionResponseByPollCountReactions(numOfResponses int, stateProgressions []fakeosb.ProvisionReaction) fakeosb.DynamicProvisionReaction { + numberOfPolls := 0 + numberOfStates := len(stateProgressions) + + return func(_ *osb.ProvisionRequest) (*osb.ProvisionResponse, error) { + var reaction fakeosb.ProvisionReaction + if numberOfPolls > (numOfResponses*numberOfStates)-1 { + reaction = stateProgressions[numberOfStates-1] + glog.V(5).Infof("Provision instance state progressions done, ended on %v", reaction) + } else { + idx := numberOfPolls / numOfResponses + reaction = stateProgressions[idx] + glog.V(5).Infof("Provision instance state progression on %v (polls:%v, idx:%v)", reaction, numberOfPolls, idx) + } + numberOfPolls++ + if reaction.Response != nil { + return &osb.ProvisionResponse{ + Async: reaction.Response.Async, + OperationKey: reaction.Response.OperationKey, + }, nil + } + return nil, reaction.Error + } +} + +func getDeprovisionResponseByPollCountReactions(numOfResponses int, stateProgressions []fakeosb.DeprovisionReaction) fakeosb.DynamicDeprovisionReaction { + numberOfPolls := 0 + numberOfStates := len(stateProgressions) + + return func(_ *osb.DeprovisionRequest) (*osb.DeprovisionResponse, error) { + var reaction fakeosb.DeprovisionReaction + if numberOfPolls > (numOfResponses*numberOfStates)-1 { + reaction = stateProgressions[numberOfStates-1] + glog.V(5).Infof("Deprovision instance state progressions done, ended on %v", reaction) + } else { + idx := numberOfPolls / numOfResponses + reaction = stateProgressions[idx] + glog.V(5).Infof("Deprovision instance state progression on %v (polls:%v, idx:%v)", reaction, numberOfPolls, idx) + } + numberOfPolls++ + if reaction.Response != nil { + return &osb.DeprovisionResponse{ + Async: reaction.Response.Async, + OperationKey: reaction.Response.OperationKey, + }, nil + } + return nil, reaction.Error + } +} + func getUpdateInstanceResponseByPollCountReactions(numOfResponses int, stateProgressions []fakeosb.UpdateInstanceReaction) fakeosb.DynamicUpdateInstanceReaction { numberOfPolls := 0 numberOfStates := len(stateProgressions) @@ -1225,6 +1275,18 @@ func getLastBrokerAction(t *testing.T, osbClient *fakeosb.FakeClient, actionType return brokerAction } +// findBrokerAction finds actions of the given type made to the fake broker client. +func findBrokerActions(t *testing.T, osbClient *fakeosb.FakeClient, actionType fakeosb.ActionType) []fakeosb.Action { + brokerActions := osbClient.Actions() + foundActions := make([]fakeosb.Action, 0, len(brokerActions)) + for _, action := range brokerActions { + if action.Type == actionType { + foundActions = append(foundActions, action) + } + } + return foundActions +} + // convertParametersIntoRawExtension converts the specified map of parameters // into a RawExtension object that can be used in the Parameters field of // ServiceInstanceSpec or ServiceBindingSpec. From 7bb4e73a2ea460a4bb56d4c493088947495092a1 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Wed, 21 Mar 2018 14:07:51 +1100 Subject: [PATCH 11/20] Remove TODO --- pkg/controller/controller_instance.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index c3ed9403dd5..115980defb8 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -1791,10 +1791,7 @@ func (c *controller) processDeprovisionFailure(instance *v1beta1.ServiceInstance } clearServiceInstanceCurrentOperation(instance) - // TODO nilebox: If we update ReconciledGeneration on failure, API server rejects - // the status update request with error: - // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" - // instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration + instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusFailed if _, err := c.updateServiceInstanceStatus(instance); err != nil { From 3e40460dac04aa4bcb4dbe513b9732386752dd4a Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Wed, 21 Mar 2018 14:57:30 +1100 Subject: [PATCH 12/20] Fix tests --- pkg/controller/controller_instance.go | 19 +------- pkg/controller/controller_instance_test.go | 55 ++++++++++++++++++---- pkg/controller/controller_test.go | 8 +++- 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index 115980defb8..f8c00ad13fb 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -1614,7 +1614,7 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, if shouldMitigateOrphan { // Copy original failure reason/message to a new OrphanMitigation condition - c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) + c.recorder.Event(instance, corev1.EventTypeWarning, startingInstanceOrphanMitigationReason, startingInstanceOrphanMitigationMessage) setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionOrphanMitigation, v1beta1.ConditionTrue, readyCond.Reason, readyCond.Message) // Overwrite Ready condition reason/message with reporting on orphan mitigation @@ -1631,11 +1631,6 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, // Deprovisioning is not required for provisioning that has failed with an // error that doesn't require orphan mitigation instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusNotRequired - instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration - // TODO nilebox: If we update ReconciledGeneration on failure, API server rejects - // the status update request with error: - // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" - //instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration } if _, err := c.updateServiceInstanceStatus(instance); err != nil { @@ -1694,10 +1689,6 @@ func (c *controller) processUpdateServiceInstanceFailure(instance *v1beta1.Servi setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) clearServiceInstanceCurrentOperation(instance) - // TODO nilebox: If we update ReconciledGeneration on failure, API server rejects - // the status update request with error: - // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" - // instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration if _, err := c.updateServiceInstanceStatus(instance); err != nil { return err @@ -1741,13 +1732,6 @@ func (c *controller) processDeprovisionSuccess(instance *v1beta1.ServiceInstance instance.Status.ExternalProperties = nil instance.Status.ProvisionStatus = v1beta1.ServiceInstanceProvisionStatusNotProvisioned instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusSucceeded - // TODO nilebox: If we update ReconciledGeneration on orphan mitigation, - // and then retry provisioning, API server rejects - // the status update request with error: - // "Forbidden: currentOperation must not be present when reconciledGeneration and generation are equal" - if !mitigatingOrphan { - instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration - } if mitigatingOrphan { if _, err := c.updateServiceInstanceStatus(instance); err != nil { @@ -1791,7 +1775,6 @@ func (c *controller) processDeprovisionFailure(instance *v1beta1.ServiceInstance } clearServiceInstanceCurrentOperation(instance) - instance.Status.ReconciledGeneration = instance.Status.ObservedGeneration instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusFailed if _, err := c.updateServiceInstanceStatus(instance); err != nil { diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index 12c37e1faa4..7f0d8f9a2af 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -939,11 +939,50 @@ func TestReconcileServiceInstanceWithTemporaryProvisionFailure(t *testing.T) { instance := getTestServiceInstanceWithRefs() + ////////////////////////////////////// + // Check 1st reconcilliation iteration (prepare/validate request & set status to in progress) + + if err := testController.reconcileServiceInstance(instance); err != nil { + t.Fatalf("Reconcile not expected to fail : %v", err) + } + + brokerActions := fakeClusterServiceBrokerClient.Actions() + assertNumberOfClusterServiceBrokerActions(t, brokerActions, 0) + + expectedKubeActions := []kubeClientAction{ + {verb: "get", resourceName: "namespaces", checkType: checkGetActionType}, + } + kubeActions := fakeKubeClient.Actions() + if err := checkKubeClientActions(kubeActions, expectedKubeActions); err != nil { + t.Fatal(err) + } + + actions := fakeCatalogClient.Actions() + assertNumberOfActions(t, actions, 1) + updatedServiceInstance := assertUpdateStatus(t, actions[0], instance) + + events := getRecordedEvents(testController) + updatedServiceInstance = assertUpdateStatus(t, actions[0], instance) + assertServiceInstanceOperationInProgress(t, + updatedServiceInstance, + v1beta1.ServiceInstanceOperationProvision, + testClusterServicePlanName, + testClusterServicePlanGUID, + instance, + ) + + ////////////////////////////////////// + // Check 2nd reconcilliation iteration (actual broker request) + + fakeCatalogClient.ClearActions() + fakeKubeClient.ClearActions() + instance = updatedServiceInstance.(*v1beta1.ServiceInstance) + if err := testController.reconcileServiceInstance(instance); err == nil { t.Fatalf("Should not be able to make the ServiceInstance") } - brokerActions := fakeClusterServiceBrokerClient.Actions() + brokerActions = fakeClusterServiceBrokerClient.Actions() assertNumberOfClusterServiceBrokerActions(t, brokerActions, 1) assertProvision(t, brokerActions[0], &osb.ProvisionRequest{ AcceptsIncomplete: true, @@ -958,30 +997,28 @@ func TestReconcileServiceInstanceWithTemporaryProvisionFailure(t *testing.T) { // verify no kube resources created // One single action comes from getting namespace uid - kubeActions := fakeKubeClient.Actions() + kubeActions = fakeKubeClient.Actions() if err := checkKubeClientActions(kubeActions, []kubeClientAction{ {verb: "get", resourceName: "namespaces", checkType: checkGetActionType}, }); err != nil { t.Fatal(err) } - actions := fakeCatalogClient.Actions() - assertNumberOfActions(t, actions, 2) - - updatedServiceInstance := assertUpdateStatus(t, actions[0], instance) - assertServiceInstanceOperationInProgress(t, updatedServiceInstance, v1beta1.ServiceInstanceOperationProvision, testClusterServicePlanName, testClusterServicePlanGUID, instance) + actions = fakeCatalogClient.Actions() + assertNumberOfActions(t, actions, 1) - updatedServiceInstance = assertUpdateStatus(t, actions[1], instance) + updatedServiceInstance = assertUpdateStatus(t, actions[0], instance) assertServiceInstanceRequestFailingErrorStartOrphanMitigation( t, updatedServiceInstance, v1beta1.ServiceInstanceOperationProvision, startingInstanceOrphanMitigationReason, "", + errorProvisionCallFailedReason, instance, ) - events := getRecordedEvents(testController) + events = getRecordedEvents(testController) message := fmt.Sprintf( "Error provisioning ServiceInstance of ClusterServiceClass (K8S: %q ExternalName: %q) at ClusterServiceBroker %q: Status: %v; ErrorMessage: %s", diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 461e4a09ea9..207d5a86932 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -2178,6 +2178,7 @@ func assertServiceInstanceOperationSuccessWithParameters(t *testing.T, obj runti ) var provisionStatus v1beta1.ServiceInstanceProvisionStatus var observedGeneration int64 + var reconciledGeneration int64 switch operation { case v1beta1.ServiceInstanceOperationProvision: provisionStatus = v1beta1.ServiceInstanceProvisionStatusProvisioned @@ -2185,12 +2186,14 @@ func assertServiceInstanceOperationSuccessWithParameters(t *testing.T, obj runti readyStatus = v1beta1.ConditionTrue deprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusRequired observedGeneration = originalInstance.Generation + reconciledGeneration = observedGeneration case v1beta1.ServiceInstanceOperationUpdate: provisionStatus = v1beta1.ServiceInstanceProvisionStatusProvisioned reason = successUpdateInstanceReason readyStatus = v1beta1.ConditionTrue deprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusRequired observedGeneration = originalInstance.Generation + reconciledGeneration = observedGeneration case v1beta1.ServiceInstanceOperationDeprovision: provisionStatus = v1beta1.ServiceInstanceProvisionStatusNotProvisioned reason = successDeprovisionReason @@ -2201,11 +2204,12 @@ func assertServiceInstanceOperationSuccessWithParameters(t *testing.T, obj runti } else { observedGeneration = originalInstance.Generation } + reconciledGeneration = originalInstance.Status.ReconciledGeneration } assertServiceInstanceReadyCondition(t, obj, readyStatus, reason) assertServiceInstanceCurrentOperationClear(t, obj) assertServiceInstanceOperationStartTimeSet(t, obj, false) - assertServiceInstanceReconciledGeneration(t, obj, observedGeneration) + assertServiceInstanceReconciledGeneration(t, obj, reconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, observedGeneration) assertServiceInstanceProvisioned(t, obj, provisionStatus) assertAsyncOpInProgressFalse(t, obj) @@ -2267,7 +2271,7 @@ func assertServiceInstanceProvisionRequestFailingErrorNoOrphanMitigation(t *test func assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t *testing.T, obj runtime.Object, operation v1beta1.ServiceInstanceOperation, readyReason string, failureReason string, originalInstance *v1beta1.ServiceInstance) { assertServiceInstanceRequestFailingError(t, obj, operation, readyReason, failureReason, true, originalInstance) assertServiceInstanceCurrentOperationClear(t, obj) - assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Generation) + assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Status.ReconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, originalInstance.Generation) assertServiceInstanceProvisioned(t, obj, originalInstance.Status.ProvisionStatus) assertServiceInstanceOrphanMitigationInProgressFalse(t, obj) From 0e02459dd244551bcc8108c8ebda97adf904896d Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Wed, 21 Mar 2018 17:26:20 +1100 Subject: [PATCH 13/20] Fix tests --- pkg/controller/controller_instance_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index 7f0d8f9a2af..871ce104bef 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -2347,8 +2347,8 @@ func TestPollServiceInstanceSuccessProvisioningWithOperation(t *testing.T) { t.Fatalf("pollServiceInstance failed: %s", err) } - if testController.instancePollingQueue.NumRequeues(instanceKey) == 0 { - t.Fatalf("Expected polling queue to have requeues of test instance after polling have completed with a 'failed' state") + if testController.instancePollingQueue.NumRequeues(instanceKey) != 0 { + t.Fatalf("Expected polling queue to not have requeues of test instance after polling have completed with a 'success' state") } brokerActions := fakeClusterServiceBrokerClient.Actions() @@ -2401,8 +2401,8 @@ func TestPollServiceInstanceFailureProvisioningWithOperation(t *testing.T) { t.Fatalf("pollServiceInstance failed: %s", err) } - if testController.instancePollingQueue.NumRequeues(instanceKey) != 0 { - t.Fatalf("Expected polling queue to not have any record of test instance as polling should have completed") + if testController.instancePollingQueue.NumRequeues(instanceKey) == 0 { + t.Fatalf("Expected polling queue to have a record of test instance as provisioning should have retried") } brokerActions := fakeClusterServiceBrokerClient.Actions() From 7c33d06632d061d572b9de6b63f51ff5aaf17e5a Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 00:24:44 +1100 Subject: [PATCH 14/20] Fix integration tests --- test/integration/controller_instance_test.go | 129 +++++++++++++------ 1 file changed, 89 insertions(+), 40 deletions(-) diff --git a/test/integration/controller_instance_test.go b/test/integration/controller_instance_test.go index 6947500d80d..d81a3c41a2a 100644 --- a/test/integration/controller_instance_test.go +++ b/test/integration/controller_instance_test.go @@ -1354,7 +1354,7 @@ func TestDeleteServiceInstance(t *testing.T) { } } -func TestPollServiceInstanceLastOperation(t *testing.T) { +func TestPollServiceInstanceLastOperationSuccess(t *testing.T) { cases := []struct { name string setup func(t *controllerTest) @@ -1403,28 +1403,6 @@ func TestPollServiceInstanceLastOperation(t *testing.T) { Reason: "ProvisionedSuccessfully", }, }, - { - name: "async provisioning with last operation response state failed", - setup: func(ct *controllerTest) { - ct.osbClient.ProvisionReaction = &fakeosb.ProvisionReaction{ - Response: &osb.ProvisionResponse{ - Async: true, - }, - } - ct.osbClient.PollLastOperationReaction = &fakeosb.PollLastOperationReaction{ - Response: &osb.LastOperationResponse{ - State: osb.StateFailed, - Description: strPtr("testDescription"), - }, - } - }, - skipVerifyingInstanceSuccess: true, - verifyCondition: &v1beta1.ServiceInstanceCondition{ - Type: v1beta1.ServiceInstanceConditionReady, - Status: v1beta1.ConditionFalse, - Reason: "ProvisionCallFailed", - }, - }, // response errors { name: "async provisioning with error on first poll", @@ -1485,23 +1463,6 @@ func TestPollServiceInstanceLastOperation(t *testing.T) { Reason: "ProvisionedSuccessfully", }, }, - { - name: "async provisioning with last operation response state failed eventually", - setup: func(ct *controllerTest) { - ct.osbClient.ProvisionReaction = &fakeosb.ProvisionReaction{ - Response: &osb.ProvisionResponse{ - Async: true, - }, - } - ct.osbClient.PollLastOperationReaction = fakeosb.DynamicPollLastOperationReaction(getLastOperationResponseByPollCountStates(2, []osb.LastOperationState{osb.StateInProgress, osb.StateFailed})) - }, - skipVerifyingInstanceSuccess: true, - verifyCondition: &v1beta1.ServiceInstanceCondition{ - Type: v1beta1.ServiceInstanceConditionReady, - Status: v1beta1.ConditionFalse, - Reason: "ProvisionCallFailed", - }, - }, { name: "async last operation response successful with originating identity", setup: func(ct *controllerTest) { @@ -1565,6 +1526,94 @@ func TestPollServiceInstanceLastOperation(t *testing.T) { } } +// TestPollServiceInstanceLastOperationFailure checks that async operation is correctly +// retried after the initial operation fails +func TestPollServiceInstanceLastOperationFailure(t *testing.T) { + cases := []struct { + name string + setup func(t *controllerTest) + skipVerifyingInstanceSuccess bool + failureCondition *v1beta1.ServiceInstanceCondition + successCondition *v1beta1.ServiceInstanceCondition + }{ + { + name: "async provisioning with last operation response state failed", + setup: func(ct *controllerTest) { + ct.osbClient.ProvisionReaction = &fakeosb.ProvisionReaction{ + Response: &osb.ProvisionResponse{ + Async: true, + }, + } + ct.osbClient.PollLastOperationReaction = fakeosb.DynamicPollLastOperationReaction( + getLastOperationResponseByPollCountStates(2, + []osb.LastOperationState{ + osb.StateFailed, + osb.StateSucceeded, + })) + }, + skipVerifyingInstanceSuccess: false, + failureCondition: &v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionFalse, + Reason: "ProvisionCallFailed", + }, + successCondition: &v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionTrue, + Reason: "ProvisionedSuccessfully", + }, + }, + // response errors + { + name: "async provisioning with last operation response state failed eventually", + setup: func(ct *controllerTest) { + ct.osbClient.ProvisionReaction = &fakeosb.ProvisionReaction{ + Response: &osb.ProvisionResponse{ + Async: true, + }, + } + ct.osbClient.PollLastOperationReaction = fakeosb.DynamicPollLastOperationReaction( + getLastOperationResponseByPollCountStates(1, + []osb.LastOperationState{ + osb.StateInProgress, + osb.StateFailed, + osb.StateInProgress, + osb.StateSucceeded, + })) + }, + skipVerifyingInstanceSuccess: false, + failureCondition: &v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionFalse, + Reason: "ProvisionCallFailed", + }, + successCondition: &v1beta1.ServiceInstanceCondition{ + Type: v1beta1.ServiceInstanceConditionReady, + Status: v1beta1.ConditionTrue, + Reason: "ProvisionedSuccessfully", + }, + }, + } + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + //t.Parallel() + ct := &controllerTest{ + t: t, + broker: getTestBroker(), + instance: getTestInstance(), + skipVerifyingInstanceSuccess: tc.skipVerifyingInstanceSuccess, + setup: tc.setup, + } + ct.run(func(ct *controllerTest) { + if err := util.WaitForInstanceCondition(ct.client, testNamespace, testInstanceName, *tc.successCondition); err != nil { + t.Fatalf("error waiting for instance condition: %v", err) + } + }) + }) + } +} + // TestRetryAsyncDeprovision tests whether asynchronous deprovisioning retries // by attempting a new deprovision after failing. func TestRetryAsyncDeprovision(t *testing.T) { From e4e5afc8f281eb4fc4d565f9a1e3303751138376 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 16:32:18 +1100 Subject: [PATCH 15/20] Increase Jenkins timeout from 30 to 50 minutes --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index bcd38c81dae..2cfd91f3376 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -65,7 +65,7 @@ def test_account = params.TEST_ACCOUNT def test_zone = params.TEST_ZONE ?: 'us-west1-b' def namespace = 'catalog' def root_path = 'src/github.com/kubernetes-incubator/service-catalog' -def timeoutMin = 30 +def timeoutMin = 50 def certFolder = '/tmp/sc-certs' node { From 77bbfbd3076f39858e6c48e97a92014b8e23022b Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 17:28:47 +1100 Subject: [PATCH 16/20] Cleanup --- pkg/controller/controller.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 48fe4646a96..aac41ca2d30 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -660,20 +660,19 @@ func (c *controller) reconciliationRetryDurationExceeded(operationStartTime *met // shouldStartOrphanMitigation returns whether an error with the given status // code indicates that orphan migitation should start. func shouldStartOrphanMitigation(statusCode int) bool { - is2XX := (statusCode >= 200 && statusCode < 300) - is5XX := (statusCode >= 500 && statusCode < 600) + is2XX := statusCode >= 200 && statusCode < 300 + is5XX := statusCode >= 500 && statusCode < 600 - return (is2XX && statusCode != http.StatusOK) || - is5XX + return (is2XX && statusCode != http.StatusOK) || is5XX } // isRetriableHTTPStatus returns whether an error with the given HTTP status // code is retriable. func isRetriableHTTPStatus(statusCode int) bool { - return statusCode != 400 + return statusCode != http.StatusBadRequest } -// ReconciliationAction reprents a type of action the reconciler should take +// ReconciliationAction represents a type of action the reconciler should take // for a resource. type ReconciliationAction string From fbd39b17a9a50a36e83178d9676c4c209fe48578 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 18:26:24 +1100 Subject: [PATCH 17/20] Implement support for temporary update errors + clarify comments about rate limiting --- pkg/controller/controller_instance.go | 45 ++++++++++++++++++++------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index f8c00ad13fb..63f18f05793 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -1598,10 +1598,8 @@ func (c *controller) processTemporaryProvisionFailure(instance *v1beta1.ServiceI // ServiceInstance that hit a temporary or a terminal failure during provision // reconciliation. func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition, shouldMitigateOrphan bool) error { - if readyCond != nil { - c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) - setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) - } + c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) + setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) var errorMessage error if failedCond != nil { @@ -1637,8 +1635,10 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, return err } - // Need to vary return error depending on whether the worker should - // requeue this resource. + // The instance will be requeued in any case, since we updated the status + // a few lines above. + // But we still need to return a non-nil error for retriable errors and + // orphan mitigation to avoid resetting the rate limiter. if failedCond == nil || shouldMitigateOrphan { return errorMessage } @@ -1678,22 +1678,43 @@ func (c *controller) processUpdateServiceInstanceSuccess(instance *v1beta1.Servi return nil } +// processTerminalUpdateServiceInstanceFailure handles the logging and updating of a +// ServiceInstance that hit a terminal failure during update reconciliation. +func (c *controller) processTerminalUpdateServiceInstanceFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition) error { + if failedCond == nil { + return fmt.Errorf("failedCond must not be nil") + } + return c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) +} + +// processTemporaryUpdateServiceInstanceFailure handles the logging and updating of a +// ServiceInstance that hit a temporary error during update reconciliation. +func (c *controller) processTemporaryUpdateServiceInstanceFailure(instance *v1beta1.ServiceInstance, readyCond *v1beta1.ServiceInstanceCondition) error { + return c.processUpdateServiceInstanceFailure(instance, readyCond, nil) +} + // processUpdateServiceInstanceFailure handles the logging and updating of a // ServiceInstance that hit a terminal failure during update reconciliation. func (c *controller) processUpdateServiceInstanceFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition) error { - // TODO nilebox: We need to distinguish terminal and temporary errors there - // but we need to merge https://github.com/kubernetes-incubator/service-catalog/pull/1748 - // first that makes this method consistent with processProvisioningFailure() c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message) - setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message) - setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) - clearServiceInstanceCurrentOperation(instance) + + if failedCond != nil { + setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) + clearServiceInstanceCurrentOperation(instance) + } if _, err := c.updateServiceInstanceStatus(instance); err != nil { return err } + // The instance will be requeued in any case, since we updated the status + // a few lines above. + // But we still need to return a non-nil error for retriable errors + // to avoid resetting the rate limiter. + if failedCond == nil { + return fmt.Errorf(readyCond.Message) + } return nil } From 89cdb3342937bc6305aa091006b10ac8ca32b149 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 18:45:57 +1100 Subject: [PATCH 18/20] Don't set the Failed condition for temporary update errors --- pkg/controller/controller_instance.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index 63f18f05793..8f4b52454a4 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -484,17 +484,22 @@ func (c *controller) reconcileServiceInstanceUpdate(instance *v1beta1.ServiceIns if httpErr, ok := osb.IsHTTPError(err); ok { msg := fmt.Sprintf("ClusterServiceBroker returned a failure for update call; update will not be retried: %v", httpErr) readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, errorUpdateInstanceCallFailedReason, msg) + + if isRetriableHTTPStatus(httpErr.StatusCode) { + return c.processTemporaryUpdateServiceInstanceFailure(instance, readyCond) + } + // A failure with a given HTTP response code is treated as a terminal + // failure. failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, errorUpdateInstanceCallFailedReason, msg) - return c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) + return c.processTerminalUpdateServiceInstanceFailure(instance, readyCond, failedCond) } reason := errorErrorCallingUpdateInstanceReason if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() { - msg := fmt.Sprintf("Communication with the ClusterServiceBroker timed out; update will not be retried: %v", urlErr) + msg := fmt.Sprintf("Communication with the ClusterServiceBroker timed out; update will be retried: %v", urlErr) readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, reason, msg) - failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, reason, msg) - return c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) + return c.processTemporaryUpdateServiceInstanceFailure(instance, readyCond) } msg := fmt.Sprintf("The update call failed and will be retried: Error communicating with broker for updating: %s", err) @@ -508,7 +513,7 @@ func (c *controller) reconcileServiceInstanceUpdate(instance *v1beta1.ServiceIns msg = "Stopping reconciliation retries because too much time has elapsed" readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, errorReconciliationRetryTimeoutReason, msg) failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, errorReconciliationRetryTimeoutReason, msg) - return c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) + return c.processTerminalUpdateServiceInstanceFailure(instance, readyCond, failedCond) } readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, reason, msg) @@ -769,8 +774,7 @@ func (c *controller) pollServiceInstance(instance *v1beta1.ServiceInstance) erro reason := errorUpdateInstanceCallFailedReason message := "Update call failed: " + description readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, reason, message) - failedCond := newServiceInstanceFailedCondition(v1beta1.ConditionTrue, reason, message) - err = c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) + err = c.processTemporaryUpdateServiceInstanceFailure(instance, readyCond) } if err != nil { return c.handleServiceInstancePollingError(instance, err) @@ -827,7 +831,7 @@ func (c *controller) processServiceInstancePollingFailureRetryTimeout(instance * return c.processTerminalProvisionFailure(instance, readyCond, failedCond, true) default: readyCond := newServiceInstanceReadyCondition(v1beta1.ConditionFalse, errorReconciliationRetryTimeoutReason, msg) - err = c.processUpdateServiceInstanceFailure(instance, readyCond, failedCond) + err = c.processTerminalUpdateServiceInstanceFailure(instance, readyCond, failedCond) } if err != nil { return c.handleServiceInstancePollingError(instance, err) From 67f82e62d4cb12a94286185542cac261b831925a Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 22:00:36 +1100 Subject: [PATCH 19/20] Fix tests --- pkg/controller/controller_instance_test.go | 6 +++--- pkg/controller/controller_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index 871ce104bef..c559d5249db 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -5221,8 +5221,8 @@ func TestPollServiceInstanceAsyncFailureUpdating(t *testing.T) { t.Fatalf("pollServiceInstance failed: %s", err) } - if testController.instancePollingQueue.NumRequeues(instanceKey) != 0 { - t.Fatalf("Expected polling queue to not have any record of test instance as polling should have completed") + if testController.instancePollingQueue.NumRequeues(instanceKey) == 0 { + t.Fatalf("Expected polling queue to have a record of test instance as update should have retried") } brokerActions := fakeClusterServiceBrokerClient.Actions() @@ -5244,7 +5244,7 @@ func TestPollServiceInstanceAsyncFailureUpdating(t *testing.T) { assertNumberOfActions(t, actions, 1) updatedServiceInstance := assertUpdateStatus(t, actions[0], instance) - assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t, updatedServiceInstance, v1beta1.ServiceInstanceOperationUpdate, errorUpdateInstanceCallFailedReason, errorUpdateInstanceCallFailedReason, instance) + assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t, updatedServiceInstance, v1beta1.ServiceInstanceOperationUpdate, errorUpdateInstanceCallFailedReason, "", instance) } func TestCheckClassAndPlanForDeletion(t *testing.T) { diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 207d5a86932..89988e12f5e 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -2269,7 +2269,7 @@ func assertServiceInstanceProvisionRequestFailingErrorNoOrphanMitigation(t *test } func assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t *testing.T, obj runtime.Object, operation v1beta1.ServiceInstanceOperation, readyReason string, failureReason string, originalInstance *v1beta1.ServiceInstance) { - assertServiceInstanceRequestFailingError(t, obj, operation, readyReason, failureReason, true, originalInstance) + assertServiceInstanceRequestFailingError(t, obj, operation, readyReason, failureReason, false, originalInstance) assertServiceInstanceCurrentOperationClear(t, obj) assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Status.ReconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, originalInstance.Generation) From 5aa01392320e2be8b684bb52f298e982a3de4336 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Thu, 22 Mar 2018 22:42:15 +1100 Subject: [PATCH 20/20] Don't reset the current operation + fix unit tests --- pkg/controller/controller_instance.go | 19 +++++++++-- pkg/controller/controller_instance_test.go | 6 ++-- pkg/controller/controller_test.go | 39 ++++++++++++++++++---- 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/pkg/controller/controller_instance.go b/pkg/controller/controller_instance.go index 8f4b52454a4..7f7f95d9896 100644 --- a/pkg/controller/controller_instance.go +++ b/pkg/controller/controller_instance.go @@ -1625,16 +1625,23 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, startingInstanceOrphanMitigationReason, startingInstanceOrphanMitigationMessage) - instance.Status.OperationStartTime = nil - instance.Status.AsyncOpInProgress = false instance.Status.OrphanMitigationInProgress = true } else { - clearServiceInstanceCurrentOperation(instance) // Deprovisioning is not required for provisioning that has failed with an // error that doesn't require orphan mitigation instance.Status.DeprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusNotRequired } + if failedCond == nil || shouldMitigateOrphan { + // Don't reset the current operation if the error is retriable + // or requires an orphan mitigation. + // Only reset the OSB operation status + clearServiceInstanceAsyncOsbOperation(instance) + } else { + // Reset the current operation if there was a terminal error + clearServiceInstanceCurrentOperation(instance) + } + if _, err := c.updateServiceInstanceStatus(instance); err != nil { return err } @@ -1705,7 +1712,13 @@ func (c *controller) processUpdateServiceInstanceFailure(instance *v1beta1.Servi if failedCond != nil { setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionFailed, failedCond.Status, failedCond.Reason, failedCond.Message) + // Reset the current operation if there was a terminal error clearServiceInstanceCurrentOperation(instance) + } else { + // Don't reset the current operation if the error is retriable + // or requires an orphan mitigation. + // Only reset the OSB operation status + clearServiceInstanceAsyncOsbOperation(instance) } if _, err := c.updateServiceInstanceStatus(instance); err != nil { diff --git a/pkg/controller/controller_instance_test.go b/pkg/controller/controller_instance_test.go index c559d5249db..805f47e7d04 100644 --- a/pkg/controller/controller_instance_test.go +++ b/pkg/controller/controller_instance_test.go @@ -4773,8 +4773,8 @@ func TestReconcileServiceInstanceWithUpdateFailure(t *testing.T) { fakeCatalogClient.ClearActions() fakeKubeClient.ClearActions() - if err := testController.reconcileServiceInstance(instance); err != nil { - t.Fatalf("unexpected error: %v", err) + if err := testController.reconcileServiceInstance(instance); err == nil { + t.Fatal("expected error to be returned") } brokerActions := fakeClusterServiceBrokerClient.Actions() @@ -4803,7 +4803,7 @@ func TestReconcileServiceInstanceWithUpdateFailure(t *testing.T) { assertNumberOfActions(t, actions, 1) updatedServiceInstance := assertUpdateStatus(t, actions[0], instance) - assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t, updatedServiceInstance, v1beta1.ServiceInstanceOperationUpdate, errorUpdateInstanceCallFailedReason, errorUpdateInstanceCallFailedReason, instance) + assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t, updatedServiceInstance, v1beta1.ServiceInstanceOperationUpdate, errorUpdateInstanceCallFailedReason, "", instance) events := getRecordedEvents(testController) diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 89988e12f5e..c5a8e499a87 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -2157,7 +2157,7 @@ func assertServiceInstanceOperationInProgressWithParameters(t *testing.T, obj ru func assertServiceInstanceStartingOrphanMitigation(t *testing.T, obj runtime.Object, originalInstance *v1beta1.ServiceInstance) { assertServiceInstanceCurrentOperation(t, obj, v1beta1.ServiceInstanceOperationProvision) assertServiceInstanceReadyFalse(t, obj, startingInstanceOrphanMitigationReason) - assertServiceInstanceOperationStartTimeSet(t, obj, false) + assertServiceInstanceOperationStartTimeSet(t, obj, true) assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Status.ReconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, originalInstance.Generation) assertServiceInstanceProvisioned(t, obj, originalInstance.Status.ProvisionStatus) @@ -2234,25 +2234,35 @@ func assertServiceInstanceRequestFailingError(t *testing.T, obj runtime.Object, deprovisionStatus v1beta1.ServiceInstanceDeprovisionStatus ) switch operation { - case v1beta1.ServiceInstanceOperationProvision, v1beta1.ServiceInstanceOperationUpdate: + case v1beta1.ServiceInstanceOperationProvision: readyStatus = v1beta1.ConditionFalse if orphanMitigationRequired { deprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusRequired } else { deprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusNotRequired } + case v1beta1.ServiceInstanceOperationUpdate: + readyStatus = v1beta1.ConditionFalse + deprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusRequired case v1beta1.ServiceInstanceOperationDeprovision: readyStatus = v1beta1.ConditionUnknown deprovisionStatus = v1beta1.ServiceInstanceDeprovisionStatusFailed } assertServiceInstanceReadyCondition(t, obj, readyStatus, readyReason) + if failureReason == "" { assertServiceInstanceConditionMissing(t, obj, v1beta1.ServiceInstanceConditionFailed) } else { assertServiceInstanceCondition(t, obj, v1beta1.ServiceInstanceConditionFailed, v1beta1.ConditionTrue, failureReason) } - assertServiceInstanceOperationStartTimeSet(t, obj, false) + + if failureReason == "" || orphanMitigationRequired { + assertServiceInstanceOperationStartTimeSet(t, obj, true) + } else { + assertServiceInstanceOperationStartTimeSet(t, obj, false) + } + assertAsyncOpInProgressFalse(t, obj) assertServiceInstanceExternalPropertiesUnchanged(t, obj, originalInstance) assertServiceInstanceDeprovisionStatus(t, obj, deprovisionStatus) @@ -2260,22 +2270,37 @@ func assertServiceInstanceRequestFailingError(t *testing.T, obj runtime.Object, func assertServiceInstanceProvisionRequestFailingErrorNoOrphanMitigation(t *testing.T, obj runtime.Object, operation v1beta1.ServiceInstanceOperation, readyReason string, failureReason string, originalInstance *v1beta1.ServiceInstance) { assertServiceInstanceRequestFailingError(t, obj, operation, readyReason, failureReason, false, originalInstance) - assertServiceInstanceCurrentOperationClear(t, obj) + + if failureReason == "" { + assertServiceInstanceCurrentOperation(t, obj, operation) + assertServiceInstanceInProgressPropertiesUnchanged(t, obj, originalInstance) + } else { + assertServiceInstanceCurrentOperationClear(t, obj) + assertServiceInstanceInProgressPropertiesNil(t, obj) + } + assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Status.ReconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, originalInstance.Generation) assertServiceInstanceProvisioned(t, obj, originalInstance.Status.ProvisionStatus) assertServiceInstanceOrphanMitigationInProgressFalse(t, obj) - assertServiceInstanceInProgressPropertiesNil(t, obj) + } func assertServiceInstanceUpdateRequestFailingErrorNoOrphanMitigation(t *testing.T, obj runtime.Object, operation v1beta1.ServiceInstanceOperation, readyReason string, failureReason string, originalInstance *v1beta1.ServiceInstance) { assertServiceInstanceRequestFailingError(t, obj, operation, readyReason, failureReason, false, originalInstance) - assertServiceInstanceCurrentOperationClear(t, obj) + + if failureReason == "" { + assertServiceInstanceCurrentOperation(t, obj, operation) + assertServiceInstanceInProgressPropertiesUnchanged(t, obj, originalInstance) + } else { + assertServiceInstanceCurrentOperationClear(t, obj) + assertServiceInstanceInProgressPropertiesNil(t, obj) + } + assertServiceInstanceReconciledGeneration(t, obj, originalInstance.Status.ReconciledGeneration) assertServiceInstanceObservedGeneration(t, obj, originalInstance.Generation) assertServiceInstanceProvisioned(t, obj, originalInstance.Status.ProvisionStatus) assertServiceInstanceOrphanMitigationInProgressFalse(t, obj) - assertServiceInstanceInProgressPropertiesNil(t, obj) } func assertServiceInstanceRequestFailingErrorStartOrphanMitigation(t *testing.T, obj runtime.Object, operation v1beta1.ServiceInstanceOperation, readyReason string, failureReason string, orphanMitigationReason string, originalInstance *v1beta1.ServiceInstance) {