Skip to content

Commit

Permalink
Use a single cache for all dynamic controllers
Browse files Browse the repository at this point in the history
Crossplane uses a controller engine to dynamically start claim and XR
controllers when a new XRD is installed.

Before this commit, each controller gets at least one cache. This is
because when I built this functionality, you couldn't stop a single
informer within a cache (a cache is basically a map of informers by
GVK).

When realtime composition is enabled, there are even more caches. One
per composed resource GVK. A GVK routed cache routes cache lookups to
these various delegate caches.

Meanwhile, controller-runtime recently made it possible to stop an
informer within a cache. It's also been possible to remove an event
handler from an informer for some time (since Kubernetes 1.26).

kubernetes-sigs/controller-runtime#2285
kubernetes-sigs/controller-runtime#2046

This commit uses a single client, backed by a single cache, across all
dynamic controllers (specifically the definition, offered, claim, and
XR controllers).

Compared to the current implementation, this commit:

* Takes fewer global locks when realtime compositions are enabled.
  Locking is now mostly at the controller scope.
* Works with the breaking changes to source.Source introduced in
  controller-runtime v0.18. :)

I think this makes the realtime composition code a little easier to
follow by consolodating it into the ControllerEngine, but that's
pretty subjective.

Signed-off-by: Nic Cope <[email protected]>
  • Loading branch information
negz committed May 20, 2024
1 parent b756bd1 commit 5ce3edd
Show file tree
Hide file tree
Showing 32 changed files with 4,761 additions and 2,977 deletions.
92 changes: 90 additions & 2 deletions cmd/crossplane/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"context"
"crypto/tls"
"fmt"
"io"
"os"
"path/filepath"
"time"
Expand All @@ -30,6 +31,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
kcache "k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -44,11 +46,13 @@ import (
"github.com/crossplane/crossplane-runtime/pkg/feature"
"github.com/crossplane/crossplane-runtime/pkg/logging"
"github.com/crossplane/crossplane-runtime/pkg/ratelimiter"
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured"

"github.com/crossplane/crossplane/internal/controller/apiextensions"
apiextensionscontroller "github.com/crossplane/crossplane/internal/controller/apiextensions/controller"
"github.com/crossplane/crossplane/internal/controller/pkg"
pkgcontroller "github.com/crossplane/crossplane/internal/controller/pkg/controller"
"github.com/crossplane/crossplane/internal/engine"
"github.com/crossplane/crossplane/internal/features"
"github.com/crossplane/crossplane/internal/initializer"
"github.com/crossplane/crossplane/internal/metrics"
Expand Down Expand Up @@ -134,6 +138,8 @@ func (c *startCommand) Run(s *runtime.Scheme, log logging.Logger) error { //noli
Deduplicate: true,
})

// The claim and XR controllers don't use the manager's cache or client.
// They use their own. They're setup later in this method.
eb := record.NewBroadcaster()
mgr, err := ctrl.NewManager(ratelimiter.LimitRESTConfig(cfg, c.MaxReconcileRate), ctrl.Options{
Scheme: s,
Expand Down Expand Up @@ -270,9 +276,91 @@ func (c *startCommand) Run(s *runtime.Scheme, log logging.Logger) error { //noli
log.Info("Alpha feature enabled", "flag", features.EnableAlphaClaimSSA)
}

// Claim and XR controllers are started and stopped dynamically by the
// ControllerEngine below. When realtime compositions are enabled, they also
// start and stop their watches (e.g. of composed resources) dynamically. To
// do this, the ControllerEngine must have exclusive ownership of a cache.
// This allows it to track what controllers are using the cache's informers.
ca, err := cache.New(mgr.GetConfig(), cache.Options{
HTTPClient: mgr.GetHTTPClient(),
Scheme: mgr.GetScheme(),
Mapper: mgr.GetRESTMapper(),
SyncPeriod: &c.SyncInterval,

// When a CRD is deleted, any informers for its GVKs will start trying
// to restart their watches, and fail with scary errors. This should
// only happen when realtime composition is enabled, and we should GC
// the informer within 60 seconds. This handler tries to make the error
// a little more informative, and less scary.
DefaultWatchErrorHandler: func(_ *kcache.Reflector, err error) {
if errors.Is(io.EOF, err) {
// Watch closed normally.
return
}
log.Debug("Watch error - probably due to CRD being uninstalled", "error", err)
},
})
if err != nil {
return errors.Wrap(err, "cannot create cache for API extension controllers")
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
// Don't start the cache until the manager is elected.
<-mgr.Elected()

if err := ca.Start(ctx); err != nil {
log.Info("API extensions cache returned an error", "error", err)
}

log.Info("API extensions cache stopped")
}()

cl, err := client.New(mgr.GetConfig(), client.Options{
HTTPClient: mgr.GetHTTPClient(),
Scheme: mgr.GetScheme(),
Mapper: mgr.GetRESTMapper(),
Cache: &client.CacheOptions{
Reader: ca,

// Don't cache secrets - there may be a lot of them.
DisableFor: []client.Object{&corev1.Secret{}},

// Cache unstructured resources (like XRs and MRs) on Get and List.
Unstructured: true,
},
})
if err != nil {
return errors.Wrap(err, "cannot create client for API extension controllers")
}

// It's important the engine's client is wrapped with unstructured.NewClient
// because controller-runtime always caches *unstructured.Unstructured, not
// our wrapper types like *composite.Unstructured. This client takes care of
// automatically wrapping and unwrapping *unstructured.Unstructured.
ce := engine.New(mgr,
engine.TrackInformers(ca, mgr.GetScheme()),
unstructured.NewClient(cl),
engine.WithLogger(log),
)

// TODO(negz): Garbage collect informers for CRs that are still defined
// (i.e. still have CRDs) but aren't used? Currently if an XR starts
// composing a kind of CR then stops, we won't stop the unused informer
// until the CRD that defines the CR is deleted. That could never happen.
// Consider for example composing two types of MR from the same provider,
// then updating to compose only one.

// Garbage collect informers for custom resources when their CRD is deleted.
if err := ce.GarbageCollectCustomResourceInformers(ctx); err != nil {
return errors.Wrap(err, "cannot start garbage collector for custom resource informers")
}

ao := apiextensionscontroller.Options{
Options: o,
FunctionRunner: functionRunner,
Options: o,
ControllerEngine: ce,
FunctionRunner: functionRunner,
}

if err := apiextensions.Setup(mgr, ao); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ require (
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.1
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
Expand All @@ -151,7 +151,7 @@ require (
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20230516205744-dbecb1de8cfa // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0
github.com/google/uuid v1.6.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
Expand Down
13 changes: 1 addition & 12 deletions internal/controller/apiextensions/claim/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1"
Expand All @@ -37,7 +36,6 @@ import (
"github.com/crossplane/crossplane-runtime/pkg/meta"
"github.com/crossplane/crossplane-runtime/pkg/reconciler/managed"
"github.com/crossplane/crossplane-runtime/pkg/resource"
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured"
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured/claim"
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured/composite"

Expand Down Expand Up @@ -221,14 +219,6 @@ func defaultCRClaim(c client.Client) crClaim {
// A ReconcilerOption configures a Reconciler.
type ReconcilerOption func(*Reconciler)

// WithClient specifies how the Reconciler should interact with the Kubernetes
// API.
func WithClient(c client.Client) ReconcilerOption {
return func(r *Reconciler) {
r.client = c
}
}

// WithManagedFieldsUpgrader specifies how the Reconciler should upgrade claim
// and composite resource (XR) managed fields from client-side apply to
// server-side apply.
Expand Down Expand Up @@ -300,8 +290,7 @@ func WithPollInterval(after time.Duration) ReconcilerOption {
// The returned Reconciler will apply only the ObjectMetaConfigurator by
// default; most callers should supply one or more CompositeConfigurators to
// configure their composite resources.
func NewReconciler(m manager.Manager, of resource.CompositeClaimKind, with resource.CompositeKind, o ...ReconcilerOption) *Reconciler {
c := unstructured.NewClient(m.GetClient())
func NewReconciler(c client.Client, of resource.CompositeClaimKind, with resource.CompositeKind, o ...ReconcilerOption) *Reconciler {
r := &Reconciler{
client: c,
gvkClaim: schema.GroupVersionKind(of),
Expand Down
Loading

0 comments on commit 5ce3edd

Please sign in to comment.