From a83ac33825dd73d04ca201737ec012fe820e801b Mon Sep 17 00:00:00 2001 From: Evan Cordell Date: Tue, 7 May 2019 11:11:16 -0400 Subject: [PATCH] fix(catalog): fix issue where subscriptions sometimes get "stuck" we were not resetting the client when updating a catalogsource, which meant it was possible for the client to be stale and never attempt a reconnect if it didn't go unhealthy "in time" for us to detect and reconnect. --- pkg/controller/operators/catalog/operator.go | 13 ++++++++++--- pkg/controller/operators/olm/operator.go | 3 --- pkg/controller/operators/olm/operatorgroup.go | 4 ++++ test/e2e/e2e-values.yaml | 4 +--- test/e2e/installplan_e2e_test.go | 19 +++++++++++++------ 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index 44ac5ce0b8..7f12205ea6 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -446,6 +446,12 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) { o.sourcesLastUpdate = timeNow() logger.Debug("registry server recreated") + func() { + o.sourcesLock.Lock() + defer o.sourcesLock.Unlock() + delete(o.sources, sourceKey) + }() + return nil } logger.Debug("registry state good") @@ -717,7 +723,7 @@ func (o *Operator) nothingToUpdate(logger *logrus.Entry, sub *v1alpha1.Subscript logger.Debugf("skipping update: no new updates to catalog since last sync at %s", sub.Status.LastUpdated.String()) return true } - if sub.Status.Install != nil && sub.Status.State == v1alpha1.SubscriptionStateUpgradePending { + if sub.Status.InstallPlanRef != nil && sub.Status.State == v1alpha1.SubscriptionStateUpgradePending { logger.Debugf("skipping update: installplan already created") return true } @@ -797,13 +803,14 @@ func (o *Operator) ensureSubscriptionCSVState(logger *logrus.Entry, sub *v1alpha out.Status.LastUpdated = timeNow() // Update Subscription with status of transition. Log errors if we can't write them to the status. - if sub, err = o.client.OperatorsV1alpha1().Subscriptions(out.GetNamespace()).UpdateStatus(out); err != nil { + updatedSub, err := o.client.OperatorsV1alpha1().Subscriptions(out.GetNamespace()).UpdateStatus(out) + if err != nil { logger.WithError(err).Info("error updating subscription status") return nil, false, fmt.Errorf("error updating Subscription status: " + err.Error()) } // subscription status represents cluster state - return sub, true, nil + return updatedSub, true, nil } func (o *Operator) updateSubscriptionStatus(namespace string, subs []*v1alpha1.Subscription, installPlanRef *corev1.ObjectReference) error { diff --git a/pkg/controller/operators/olm/operator.go b/pkg/controller/operators/olm/operator.go index 11c6b87fdb..c6cbbe94e1 100644 --- a/pkg/controller/operators/olm/operator.go +++ b/pkg/controller/operators/olm/operator.go @@ -1099,9 +1099,6 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v syncError = fmt.Errorf("CSV marked as replacement, but no replacmenet CSV found in cluster.") } case v1alpha1.CSVPhaseDeleting: - if err := a.csvQueueSet.Remove(out.GetName(), out.GetNamespace()); err != nil { - logger.WithError(err).Debug("error removing from queue") - } syncError = a.client.OperatorsV1alpha1().ClusterServiceVersions(out.GetNamespace()).Delete(out.GetName(), metav1.NewDeleteOptions(0)) if syncError != nil { logger.Debugf("unable to get delete csv marked for deletion: %s", syncError.Error()) diff --git a/pkg/controller/operators/olm/operatorgroup.go b/pkg/controller/operators/olm/operatorgroup.go index fd9e1f26ad..c779886830 100644 --- a/pkg/controller/operators/olm/operatorgroup.go +++ b/pkg/controller/operators/olm/operatorgroup.go @@ -544,6 +544,10 @@ func (a *Operator) ensureCSVsInNamespaces(csv *v1alpha1.ClusterServiceVersion, o } func (a *Operator) copyToNamespace(csv *v1alpha1.ClusterServiceVersion, namespace string) error { + if csv.GetNamespace() == namespace { + return nil + } + logger := a.Log.WithField("operator-ns", csv.GetNamespace()).WithField("target-ns", namespace) newCSV := csv.DeepCopy() delete(newCSV.Annotations, v1.OperatorGroupTargetsAnnotationKey) diff --git a/test/e2e/e2e-values.yaml b/test/e2e/e2e-values.yaml index cdae884280..e8b4187b7d 100644 --- a/test/e2e/e2e-values.yaml +++ b/test/e2e/e2e-values.yaml @@ -1,4 +1,5 @@ writeStatusName: '""' +debug: true olm: replicaCount: 1 @@ -7,7 +8,6 @@ olm: pullPolicy: IfNotPresent service: internalPort: 8080 - commandArgs: -debug catalog: replicaCount: 1 @@ -16,7 +16,6 @@ catalog: pullPolicy: IfNotPresent service: internalPort: 8080 - commandArgs: -debug package: replicaCount: 1 @@ -25,7 +24,6 @@ package: pullPolicy: IfNotPresent service: internalPort: 5443 - commandArgs: --debug e2e: image: diff --git a/test/e2e/installplan_e2e_test.go b/test/e2e/installplan_e2e_test.go index 5ed969cb58..b87e81a533 100644 --- a/test/e2e/installplan_e2e_test.go +++ b/test/e2e/installplan_e2e_test.go @@ -290,7 +290,7 @@ func TestInstallPlanWithCSVsAcrossMultipleCatalogSources(t *testing.T) { require.NoError(t, err) require.NotNil(t, subscription) - installPlanName := subscription.Status.Install.Name + installPlanName := subscription.Status.InstallPlanRef.Name // Wait for InstallPlan to be status: Complete before checking resource presence fetchedInstallPlan, err := fetchInstallPlan(t, crc, installPlanName, buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseComplete)) @@ -339,11 +339,12 @@ EXPECTED: require.NotNil(t, dependentSubscription.Status.InstallPlanRef) require.Equal(t, dependentCSV.GetName(), dependentSubscription.Status.CurrentCSV) - fetchedCSV, err := awaitCSV(t, crc, testNamespace, dependentCSV.GetName(), csvAnyChecker) + // Verify CSV is created + _, err = awaitCSV(t, crc, testNamespace, dependentCSV.GetName(), csvAnyChecker) require.NoError(t, err) // Update dependent subscription in catalog and wait for csv to update - updatedDependentCSV := newCSV(dependentPackageStable+"v2", testNamespace, dependentPackageStable, semver.MustParse("0.1.1"), []apiextensions.CustomResourceDefinition{dependentCRD}, nil, dependentNamedStrategy) + updatedDependentCSV := newCSV(dependentPackageStable+"-v2", testNamespace, dependentPackageStable, semver.MustParse("0.1.1"), []apiextensions.CustomResourceDefinition{dependentCRD}, nil, dependentNamedStrategy) dependentManifests = []registry.PackageManifest{ { PackageName: dependentPackageName, @@ -353,15 +354,21 @@ EXPECTED: DefaultChannelName: stableChannel, }, } + updateInternalCatalog(t, c, crc, dependentCatalogName, testNamespace, []apiextensions.CustomResourceDefinition{dependentCRD}, []v1alpha1.ClusterServiceVersion{dependentCSV, updatedDependentCSV}, dependentManifests) - dependentSubscription, err = fetchSubscription(t, crc, testNamespace, strings.Join([]string{dependentPackageStable, dependentCatalogName, testNamespace}, "-"), subscriptionHasCurrentCSV(updatedDependentCSV.GetName())) + // Wait for subscription to update + updatedDepSubscription, err := fetchSubscription(t, crc, testNamespace, strings.Join([]string{dependentPackageStable, dependentCatalogName, testNamespace}, "-"), subscriptionHasCurrentCSV(updatedDependentCSV.GetName())) require.NoError(t, err) - fetchedCSV, err = awaitCSV(t, crc, testNamespace, updatedDependentCSV.GetName(), csvAnyChecker) + // Verify installplan created and installed + fetchedUpdatedDepInstallPlan, err := fetchInstallPlan(t, crc, updatedDepSubscription.Status.InstallPlanRef.Name, buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseComplete)) require.NoError(t, err) + log(fmt.Sprintf("Install plan %s fetched with status %s", fetchedUpdatedDepInstallPlan.GetName(), fetchedUpdatedDepInstallPlan.Status.Phase)) + require.NotEqual(t, fetchedInstallPlan.GetName(), fetchedUpdatedDepInstallPlan.GetName()) - err = crc.OperatorsV1alpha1().ClusterServiceVersions(testNamespace).Delete(fetchedCSV.GetName(), metav1.NewDeleteOptions(0)) + // Wait for csv to update + _, err = awaitCSV(t, crc, testNamespace, updatedDependentCSV.GetName(), csvAnyChecker) require.NoError(t, err) }