Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-17157: *: detect when all objects are labelled, restart #3028

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/olm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func main() {
olm.WithExternalClient(crClient),
olm.WithMetadataClient(metadataClient),
olm.WithOperatorClient(opClient),
olm.WithRestConfig(config),
olm.WithRestConfig(validatingConfig),
olm.WithConfigClient(versionedConfigClient),
olm.WithProtectedCopiedCSVNamespaces(*protectedCopiedCSVNamespaces),
)
Expand Down
34 changes: 34 additions & 0 deletions cmd/olm/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@ package main
import (
"context"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/selection"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -53,6 +57,36 @@ func Manager(ctx context.Context, debug bool) (ctrl.Manager, error) {
MetricsBindAddress: "0", // TODO(njhale): Enable metrics on non-conflicting port (not 8080)
Cache: cache.Options{
ByObject: map[client.Object]cache.ByObject{
&appsv1.Deployment{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
stevekuznetsov marked this conversation as resolved.
Show resolved Hide resolved
&corev1.Service{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&apiextensionsv1.CustomResourceDefinition{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&apiregistrationv1.APIService{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&corev1.ConfigMap{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&corev1.ServiceAccount{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&rbacv1.Role{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&rbacv1.RoleBinding{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&rbacv1.ClusterRole{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
&rbacv1.ClusterRoleBinding{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
joelanford marked this conversation as resolved.
Show resolved Hide resolved
&corev1.Secret{}: {
Label: labels.SelectorFromValidatedSet(map[string]string{install.OLMManagedLabelKey: install.OLMManagedLabelValue}),
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ spec:
fieldPath: metadata.namespace
- name: OPERATOR_NAME
value: olm-operator
{{- if .Values.debug }}
- name: CI
value: "true"
{{- end }}
{{- if .Values.olm.resources }}
resources:
{{ toYaml .Values.olm.resources | indent 12 }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ spec:
- --set-workload-user-id=false
{{ end }}
image: {{ .Values.catalog.image.ref }}
{{- if .Values.debug }}
env:
- name: CI
value: "true"
{{- end }}
imagePullPolicy: {{ .Values.catalog.image.pullPolicy }}
ports:
- containerPort: {{ .Values.olm.service.internalPort }}
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/install/apiservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (i *StrategyDeploymentInstaller) createOrUpdateAPIService(caPEM []byte, des
if err := ownerutil.AddOwnerLabels(apiService, i.owner); err != nil {
return err
}
apiService.Labels[OLMManagedLabelKey] = OLMManagedLabelValue

// Create a service for the deployment
containerPort := int32(443)
Expand Down
12 changes: 10 additions & 2 deletions pkg/controller/install/certresources.go
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ func (i *StrategyDeploymentInstaller) installCertRequirementsForDeployment(deplo
Name: "system:auth-delegator",
},
}
authDelegatorClusterRoleBinding.SetName(service.GetName() + "-system:auth-delegator")
authDelegatorClusterRoleBinding.SetName(AuthDelegatorClusterRoleBindingName(service.GetName()))
authDelegatorClusterRoleBinding.SetLabels(map[string]string{OLMManagedLabelKey: OLMManagedLabelValue})

existingAuthDelegatorClusterRoleBinding, err := i.strategyClient.GetOpLister().RbacV1().ClusterRoleBindingLister().Get(authDelegatorClusterRoleBinding.GetName())
Expand Down Expand Up @@ -504,7 +504,7 @@ func (i *StrategyDeploymentInstaller) installCertRequirementsForDeployment(deplo
Name: "extension-apiserver-authentication-reader",
},
}
authReaderRoleBinding.SetName(service.GetName() + "-auth-reader")
authReaderRoleBinding.SetName(AuthReaderRoleBindingName(service.GetName()))
authReaderRoleBinding.SetNamespace(KubeSystem)
authReaderRoleBinding.SetLabels(map[string]string{OLMManagedLabelKey: OLMManagedLabelValue})

Expand Down Expand Up @@ -543,6 +543,14 @@ func (i *StrategyDeploymentInstaller) installCertRequirementsForDeployment(deplo
return &depSpec, caPEM, nil
}

func AuthDelegatorClusterRoleBindingName(serviceName string) string {
return serviceName + "-system:auth-delegator"
}

func AuthReaderRoleBindingName(serviceName string) string {
return serviceName + "-auth-reader"
}

func SetCAAnnotation(depSpec *appsv1.DeploymentSpec, caHash string) {
if len(depSpec.Template.ObjectMeta.GetAnnotations()) == 0 {
depSpec.Template.ObjectMeta.SetAnnotations(map[string]string{OLMCAHashAnnotationKey: caHash})
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/install/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,11 @@ func (i *StrategyDeploymentInstaller) deploymentForSpec(name string, spec appsv1
dep.Spec.Template.SetAnnotations(annotations)

// Set custom labels before CSV owner labels
dep.SetLabels(specLabels)
if dep.Labels == nil {
dep.Labels = map[string]string{}
}
dep.Labels[OLMManagedLabelKey] = OLMManagedLabelValue
dep.SetLabels(specLabels)

ownerutil.AddNonBlockingOwner(dep, i.owner)
ownerutil.AddOwnerLabelsForKind(dep, i.owner, v1alpha1.ClusterServiceVersionKind)
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/install/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ func TestInstallStrategyDeploymentCheckInstallErrors(t *testing.T) {
dep.Spec.Template.SetAnnotations(map[string]string{"test": "annotation"})
dep.Spec.RevisionHistoryLimit = &revisionHistoryLimit
dep.SetLabels(labels.CloneAndAddLabel(dep.ObjectMeta.GetLabels(), DeploymentSpecHashLabelKey, HashDeploymentSpec(dep.Spec)))
dep.Labels[OLMManagedLabelKey] = OLMManagedLabelValue
dep.Status.Conditions = append(dep.Status.Conditions, appsv1.DeploymentCondition{
Type: appsv1.DeploymentAvailable,
Status: corev1.ConditionTrue,
Expand Down
7 changes: 7 additions & 0 deletions pkg/controller/operators/adoption_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
Expand Down Expand Up @@ -350,6 +351,12 @@ var _ = Describe("Adoption Controller", func() {
),
}
for _, component := range components {
labels := component.GetLabels()
if labels == nil {
labels = map[string]string{}
}
labels[install.OLMManagedLabelKey] = install.OLMManagedLabelValue
component.SetLabels(labels)
Eventually(func() error {
return k8sClient.Create(ctx, component)
}, timeout, interval).Should(Succeed())
Expand Down
71 changes: 62 additions & 9 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
return nil, err
}

canFilter, err := labeller.Validate(ctx, logger, metadataClient)
canFilter, err := labeller.Validate(ctx, logger, metadataClient, crClient)
if err != nil {
return nil, err
}
Expand All @@ -207,10 +207,10 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
ogQueueSet: queueinformer.NewEmptyResourceQueueSet(),
catalogSubscriberIndexer: map[string]cache.Indexer{},
serviceAccountQuerier: scoped.NewUserDefinedServiceAccountQuerier(logger, crClient),
clientAttenuator: scoped.NewClientAttenuator(logger, config, opClient),
clientAttenuator: scoped.NewClientAttenuator(logger, validatingConfig, opClient),
installPlanTimeout: installPlanTimeout,
bundleUnpackTimeout: bundleUnpackTimeout,
clientFactory: clients.NewFactory(config),
clientFactory: clients.NewFactory(validatingConfig),
}
op.sources = grpc.NewSourceStore(logger, 10*time.Second, 10*time.Minute, op.syncSourceState)
op.sourceInvalidator = resolver.SourceProviderFromRegistryClientProvider(op.sources, logger)
Expand Down Expand Up @@ -380,10 +380,25 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
op.lister.RbacV1().RegisterRoleLister(metav1.NamespaceAll, roleInformer.Lister())
sharedIndexInformers = append(sharedIndexInformers, roleInformer.Informer())

labelObjects := func(gvr schema.GroupVersionResource, informer cache.SharedIndexInformer, sync queueinformer.LegacySyncHandler) error {
complete := map[schema.GroupVersionResource][]bool{}
completeLock := &sync.Mutex{}

labelObjects := func(gvr schema.GroupVersionResource, informer cache.SharedIndexInformer, sync func(done func() bool) queueinformer.LegacySyncHandler) error {
if canFilter {
return nil
}

// for each GVR, we may have more than one labelling controller active; each of which detects
// when it is done; we allocate a space in complete[gvr][idx] to hold that outcome and track it
var idx int
joelanford marked this conversation as resolved.
Show resolved Hide resolved
if _, exists := complete[gvr]; exists {
idx = len(complete[gvr])
complete[gvr] = append(complete[gvr], false)
} else {
idx = 0
complete[gvr] = []bool{false}
}

queue := workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{
Name: gvr.String(),
})
Expand All @@ -392,7 +407,23 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
queueinformer.WithQueue(queue),
queueinformer.WithLogger(op.logger),
queueinformer.WithInformer(informer),
queueinformer.WithSyncer(sync.ToSyncer()),
queueinformer.WithSyncer(sync(func() bool {
// this function is called by the processor when it detects that it's work is done - so, for that
// particular labelling action on that particular GVR, all objects are in the correct state. when
// that action is done, we need to further know if that was the last action to be completed, as
// when every action we know about has been completed, we re-start the process to allow the future
// invocation of this process to filter informers (canFilter = true) and elide all this logic
completeLock.Lock()
complete[gvr][idx] = true
allDone := true
for _, items := range complete {
for _, done := range items {
allDone = allDone && done
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

break early if false?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but the computation done here is trivial enough that breaking early won't make any meaningful difference in performance and will increase the code length/control flow.

}
}
completeLock.Unlock()
return allDone
}).ToSyncer()),
)
if err != nil {
return err
Expand All @@ -408,6 +439,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
rolesgvk := rbacv1.SchemeGroupVersion.WithResource("roles")
if err := labelObjects(rolesgvk, roleInformer.Informer(), labeller.ObjectLabeler[*rbacv1.Role, *rbacv1applyconfigurations.RoleApplyConfiguration](
ctx, op.logger, labeller.Filter(rolesgvk),
roleInformer.Lister().List,
rbacv1applyconfigurations.Role,
func(namespace string, ctx context.Context, cfg *rbacv1applyconfigurations.RoleApplyConfiguration, opts metav1.ApplyOptions) (*rbacv1.Role, error) {
return op.opClient.KubernetesInterface().RbacV1().Roles(namespace).Apply(ctx, cfg, opts)
Expand All @@ -420,6 +452,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
func(role *rbacv1.Role) (string, error) {
return resolver.PolicyRuleHashLabelValue(role.Rules)
},
roleInformer.Lister().List,
rbacv1applyconfigurations.Role,
func(namespace string, ctx context.Context, cfg *rbacv1applyconfigurations.RoleApplyConfiguration, opts metav1.ApplyOptions) (*rbacv1.Role, error) {
return op.opClient.KubernetesInterface().RbacV1().Roles(namespace).Apply(ctx, cfg, opts)
Expand All @@ -436,6 +469,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
rolebindingsgvk := rbacv1.SchemeGroupVersion.WithResource("rolebindings")
if err := labelObjects(rolebindingsgvk, roleBindingInformer.Informer(), labeller.ObjectLabeler[*rbacv1.RoleBinding, *rbacv1applyconfigurations.RoleBindingApplyConfiguration](
ctx, op.logger, labeller.Filter(rolebindingsgvk),
roleBindingInformer.Lister().List,
rbacv1applyconfigurations.RoleBinding,
func(namespace string, ctx context.Context, cfg *rbacv1applyconfigurations.RoleBindingApplyConfiguration, opts metav1.ApplyOptions) (*rbacv1.RoleBinding, error) {
return op.opClient.KubernetesInterface().RbacV1().RoleBindings(namespace).Apply(ctx, cfg, opts)
Expand All @@ -448,6 +482,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
func(roleBinding *rbacv1.RoleBinding) (string, error) {
return resolver.RoleReferenceAndSubjectHashLabelValue(roleBinding.RoleRef, roleBinding.Subjects)
},
roleBindingInformer.Lister().List,
rbacv1applyconfigurations.RoleBinding,
func(namespace string, ctx context.Context, cfg *rbacv1applyconfigurations.RoleBindingApplyConfiguration, opts metav1.ApplyOptions) (*rbacv1.RoleBinding, error) {
return op.opClient.KubernetesInterface().RbacV1().RoleBindings(namespace).Apply(ctx, cfg, opts)
Expand All @@ -463,7 +498,19 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo

serviceaccountsgvk := corev1.SchemeGroupVersion.WithResource("serviceaccounts")
if err := labelObjects(serviceaccountsgvk, serviceAccountInformer.Informer(), labeller.ObjectLabeler[*corev1.ServiceAccount, *corev1applyconfigurations.ServiceAccountApplyConfiguration](
ctx, op.logger, labeller.Filter(serviceaccountsgvk),
ctx, op.logger, labeller.ServiceAccountFilter(func(namespace, name string) bool {
operatorGroups, err := operatorGroupInformer.Lister().OperatorGroups(namespace).List(labels.Everything())
if err != nil {
return false
}
for _, operatorGroup := range operatorGroups {
if operatorGroup.Spec.ServiceAccountName == name {
return true
}
}
return false
}),
serviceAccountInformer.Lister().List,
corev1applyconfigurations.ServiceAccount,
func(namespace string, ctx context.Context, cfg *corev1applyconfigurations.ServiceAccountApplyConfiguration, opts metav1.ApplyOptions) (*corev1.ServiceAccount, error) {
return op.opClient.KubernetesInterface().CoreV1().ServiceAccounts(namespace).Apply(ctx, cfg, opts)
Expand All @@ -480,6 +527,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
servicesgvk := corev1.SchemeGroupVersion.WithResource("services")
if err := labelObjects(servicesgvk, serviceInformer.Informer(), labeller.ObjectLabeler[*corev1.Service, *corev1applyconfigurations.ServiceApplyConfiguration](
ctx, op.logger, labeller.Filter(servicesgvk),
serviceInformer.Lister().List,
corev1applyconfigurations.Service,
func(namespace string, ctx context.Context, cfg *corev1applyconfigurations.ServiceApplyConfiguration, opts metav1.ApplyOptions) (*corev1.Service, error) {
return op.opClient.KubernetesInterface().CoreV1().Services(namespace).Apply(ctx, cfg, opts)
Expand All @@ -505,6 +553,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
podsgvk := corev1.SchemeGroupVersion.WithResource("pods")
if err := labelObjects(podsgvk, csPodInformer.Informer(), labeller.ObjectLabeler[*corev1.Pod, *corev1applyconfigurations.PodApplyConfiguration](
ctx, op.logger, labeller.Filter(podsgvk),
csPodInformer.Lister().List,
corev1applyconfigurations.Pod,
func(namespace string, ctx context.Context, cfg *corev1applyconfigurations.PodApplyConfiguration, opts metav1.ApplyOptions) (*corev1.Pod, error) {
return op.opClient.KubernetesInterface().CoreV1().Pods(namespace).Apply(ctx, cfg, opts)
Expand Down Expand Up @@ -542,6 +591,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
ctx, op.logger, labeller.JobFilter(func(namespace, name string) (metav1.Object, error) {
return configMapInformer.Lister().ConfigMaps(namespace).Get(name)
}),
jobInformer.Lister().List,
batchv1applyconfigurations.Job,
func(namespace string, ctx context.Context, cfg *batchv1applyconfigurations.JobApplyConfiguration, opts metav1.ApplyOptions) (*batchv1.Job, error) {
return op.opClient.KubernetesInterface().BatchV1().Jobs(namespace).Apply(ctx, cfg, opts)
Expand Down Expand Up @@ -617,6 +667,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo
customresourcedefinitionsgvk := apiextensionsv1.SchemeGroupVersion.WithResource("customresourcedefinitions")
if err := labelObjects(customresourcedefinitionsgvk, crdInformer, labeller.ObjectPatchLabeler(
ctx, op.logger, labeller.Filter(customresourcedefinitionsgvk),
crdLister.List,
op.opClient.ApiextensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Patch,
)); err != nil {
return nil, err
Expand Down Expand Up @@ -1988,13 +2039,15 @@ func transitionInstallPlanState(log logrus.FieldLogger, transitioner installPlan
}
log.Debug("attempting to install")
if err := transitioner.ExecutePlan(out); err != nil {
if now.Sub(out.Status.StartTime.Time) >= timeout {
if apierrors.IsForbidden(err) || now.Sub(out.Status.StartTime.Time) < timeout {
// forbidden problems are never terminal since we don't know when a user might provide
// the service account they specified with more permissions
out.Status.Message = fmt.Sprintf("retrying execution due to error: %s", err.Error())
} else {
out.Status.SetCondition(v1alpha1.ConditionFailed(v1alpha1.InstallPlanInstalled,
v1alpha1.InstallPlanReasonComponentFailed, err.Error(), &now))
out.Status.Phase = v1alpha1.InstallPlanPhaseFailed
out.Status.Message = err.Error()
} else {
out.Status.Message = fmt.Sprintf("retrying execution due to error: %s", err.Error())
}
return out, err
} else if !out.Status.NeedsRequeue() {
Expand Down
Loading
Loading