From 71df193df300f61e77c2a9d55a93c1ecf01d564c Mon Sep 17 00:00:00 2001 From: Abu Kashem Date: Tue, 20 Aug 2019 16:15:14 -0400 Subject: [PATCH] Bug 1744245: recreate of sub should not fail install Back to back delete and recreate of a subscription object causes operator install to fail. How to reproduce: - Create a CatalogSource object - Create a subscription that refers to the CatalogSource above. - Wait for the operator to install successfully. - Update the CatalogSource - Wait for the CatalogSource to become healthy - Delete the Subscription object ( from above ). - Create the Subscription object ( no time delay between delete and create ). Delete and Create can be done one after another, there is no need to make them concurrent. The operator install will fail, Subscription status will have an error condition `ReferencedInstallPlanNotFound`. The new install plan object created by OLM gets deleted by GC. Root cause: - OLM uses a lister to get the list of Subscription(s) in a given namespace and sets the relevant subscriptions(s) found in the list as owner of the installplan object(s). - Because lister uses cache, it will return a deleted subscription until the cache is synced. - The new installplan object may get an owner ref that points to the deleted subscription. - GC garbage collects the deleted subscription and consequently deletes the new InstallPlan. - Subscription reconciler reports that the new InstallPlan object is missing and moves the Subscription to a Failed state. The api audit log has entries that validates that GC is rightfully "deleting" the new InstallPlan object. Fix: - For now, use a direct non-cached client to retrieve the list of Subscription. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1744245 Jira: https://jira.coreos.com/browse/OLM-1245 --- pkg/controller/operators/catalog/operator.go | 19 +++++++++++++++-- pkg/controller/registry/resolver/resolver.go | 21 +++++++++++++++++-- .../registry/resolver/resolver_test.go | 13 ++++++------ 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index 47075e354f..258a2b49b3 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -131,7 +131,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo client: crClient, lister: lister, namespace: operatorNamespace, - resolver: resolver.NewOperatorsV1alpha1Resolver(lister), + resolver: resolver.NewOperatorsV1alpha1Resolver(lister, crClient), catsrcQueueSet: queueinformer.NewEmptyResourceQueueSet(), subQueueSet: queueinformer.NewEmptyResourceQueueSet(), csvProvidedAPIsIndexer: map[string]cache.Indexer{}, @@ -702,7 +702,7 @@ func (o *Operator) syncResolvingNamespace(obj interface{}) error { logger.Debug("checking if subscriptions need update") - subs, err := o.lister.OperatorsV1alpha1().SubscriptionLister().Subscriptions(namespace).List(labels.Everything()) + subs, err := o.listSubscriptions(namespace) if err != nil { logger.WithError(err).Debug("couldn't list subscriptions") return err @@ -1513,6 +1513,20 @@ func (o *Operator) getUpdatedOwnerReferences(refs []metav1.OwnerReference, names return updated, nil } +func (o *Operator) listSubscriptions(namespace string) (subs []*v1alpha1.Subscription, err error) { + list, err := o.client.OperatorsV1alpha1().Subscriptions(namespace).List(metav1.ListOptions{}) + if err != nil { + return + } + + subs = make([]*v1alpha1.Subscription, 0) + for i := range list.Items { + subs = append(subs, &list.Items[i]) + } + + return +} + // competingCRDOwnersExist returns true if there exists a CSV that owns at least one of the given CSVs owned CRDs (that's not the given CSV) func competingCRDOwnersExist(namespace string, csv *v1alpha1.ClusterServiceVersion, existingOwners map[string][]string) (bool, error) { // Attempt to find a pre-existing owner in the namespace for any owned crd @@ -1542,3 +1556,4 @@ func getCSVNameSet(plan *v1alpha1.InstallPlan) map[string]struct{} { return csvNameSet } + diff --git a/pkg/controller/registry/resolver/resolver.go b/pkg/controller/registry/resolver/resolver.go index 6be9881448..b12f377dc6 100644 --- a/pkg/controller/registry/resolver/resolver.go +++ b/pkg/controller/registry/resolver/resolver.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" + "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned" v1alpha1listers "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/listers/operators/v1alpha1" "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorlister" ) @@ -24,14 +25,16 @@ type Resolver interface { type OperatorsV1alpha1Resolver struct { subLister v1alpha1listers.SubscriptionLister csvLister v1alpha1listers.ClusterServiceVersionLister + client versioned.Interface } var _ Resolver = &OperatorsV1alpha1Resolver{} -func NewOperatorsV1alpha1Resolver(lister operatorlister.OperatorLister) *OperatorsV1alpha1Resolver { +func NewOperatorsV1alpha1Resolver(lister operatorlister.OperatorLister, client versioned.Interface) *OperatorsV1alpha1Resolver { return &OperatorsV1alpha1Resolver{ subLister: lister.OperatorsV1alpha1().SubscriptionLister(), csvLister: lister.OperatorsV1alpha1().ClusterServiceVersionLister(), + client: client, } } @@ -55,7 +58,7 @@ func (r *OperatorsV1alpha1Resolver) ResolveSteps(namespace string, sourceQuerier } } - subs, err := r.subLister.Subscriptions(namespace).List(labels.Everything()) + subs, err := r.listSubscriptions(namespace) if err != nil { return nil, nil, err } @@ -168,3 +171,17 @@ func (r *OperatorsV1alpha1Resolver) sourceInfoToSubscriptions(subs []*v1alpha1.S } return } + +func (r *OperatorsV1alpha1Resolver) listSubscriptions(namespace string) (subs []*v1alpha1.Subscription, err error) { + list, err := r.client.OperatorsV1alpha1().Subscriptions(namespace).List(metav1.ListOptions{}) + if err != nil { + return + } + + subs = make([]*v1alpha1.Subscription, 0) + for i := range list.Items { + subs = append(subs, &list.Items[i]) + } + + return +} diff --git a/pkg/controller/registry/resolver/resolver_test.go b/pkg/controller/registry/resolver/resolver_test.go index 2fea356902..4be52d0380 100644 --- a/pkg/controller/registry/resolver/resolver_test.go +++ b/pkg/controller/registry/resolver/resolver_test.go @@ -14,6 +14,7 @@ import ( "k8s.io/client-go/tools/cache" "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" + "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned" "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned/fake" "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/informers/externalversions" "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install" @@ -375,12 +376,12 @@ func TestNamespaceResolver(t *testing.T) { for _, steps := range tt.out.steps { expectedSteps = append(expectedSteps, steps...) } - informerFactory, _ := StartResolverInformers(namespace, stopc, tt.clusterState...) + clientFake, informerFactory, _ := StartResolverInformers(namespace, stopc, tt.clusterState...) lister := operatorlister.NewLister() lister.OperatorsV1alpha1().RegisterSubscriptionLister(namespace, informerFactory.Operators().V1alpha1().Subscriptions().Lister()) lister.OperatorsV1alpha1().RegisterClusterServiceVersionLister(namespace, informerFactory.Operators().V1alpha1().ClusterServiceVersions().Lister()) - resolver := NewOperatorsV1alpha1Resolver(lister) + resolver := NewOperatorsV1alpha1Resolver(lister, clientFake) steps, subs, err := resolver.ResolveSteps(namespace, tt.querier) require.Equal(t, tt.out.err, err) t.Logf("%#v", steps) @@ -448,12 +449,12 @@ func TestNamespaceResolverRBAC(t *testing.T) { for _, steps := range tt.out.steps { expectedSteps = append(expectedSteps, steps...) } - informerFactory, _ := StartResolverInformers(namespace, stopc, tt.clusterState...) + clientFake, informerFactory, _ := StartResolverInformers(namespace, stopc, tt.clusterState...) lister := operatorlister.NewLister() lister.OperatorsV1alpha1().RegisterSubscriptionLister(namespace, informerFactory.Operators().V1alpha1().Subscriptions().Lister()) lister.OperatorsV1alpha1().RegisterClusterServiceVersionLister(namespace, informerFactory.Operators().V1alpha1().ClusterServiceVersions().Lister()) - resolver := NewOperatorsV1alpha1Resolver(lister) + resolver := NewOperatorsV1alpha1Resolver(lister, clientFake) querier := NewFakeSourceQuerier(map[CatalogKey][]*opregistry.Bundle{catalog: tt.bundlesInCatalog}) steps, subs, err := resolver.ResolveSteps(namespace, querier) require.Equal(t, tt.out.err, err) @@ -465,7 +466,7 @@ func TestNamespaceResolverRBAC(t *testing.T) { // Helpers for resolver tests -func StartResolverInformers(namespace string, stopCh <-chan struct{}, objs ...runtime.Object) (externalversions.SharedInformerFactory, []cache.InformerSynced) { +func StartResolverInformers(namespace string, stopCh <-chan struct{}, objs ...runtime.Object) (versioned.Interface, externalversions.SharedInformerFactory, []cache.InformerSynced) { // Create client fakes clientFake := fake.NewSimpleClientset(objs...) @@ -484,7 +485,7 @@ func StartResolverInformers(namespace string, stopCh <-chan struct{}, objs ...ru panic("failed to wait for caches to sync") } - return nsInformerFactory, hasSyncedCheckFns + return clientFake, nsInformerFactory, hasSyncedCheckFns } func newSub(namespace, pkg, channel string, catalog CatalogKey) *v1alpha1.Subscription {