Skip to content

Commit

Permalink
feat(tracker): allows to define ownerReferences for FeatureTrackers (#…
Browse files Browse the repository at this point in the history
…1228)

* feat(tracker): feature tracker can have ownerReferences

Introduces ability to define owner reference for FeatureTracker (internal
custom resource). This enables garbage collection of resources belonging
to the given Feature when its owner such as DSC or DSCI has been removed.

When Features are grouped through means of `FeatureHandler`s, the
ownership is defined under the hood, linking it to DSCI or DSC,
depending on the type used.

In addition, this PR simplifies how Feature relies on `client.Client`
instance. Instead of creating its own instance it expects to have one
passed as part of the builder call chain. It creates a default one otherwise.
With that we can rely on a client configured for controllers with the
right schemas, caching rules etc. As a consequence the change of retry
logic for status needed to adjusted. Besides a conflict it needs to be
triggered on NotFound error, as shared client's cache might not have yet
an instance of FeatureTracker created just before the condition is
reported. The reason why it was working before is the fact that Feature
has its own simple client instance w/o caching.

> [!IMPORTANT]
>
> With DSCI and DSC becoming owners of particular FeatureTrackers
> `func (c *Component) Cleanup` called in finalizer block becomes
> somewhat speculative. The only use case where custom function is
> invoked is unpatching SMCP authorization provider. This was based
> on early assumption that we might want to plug into customer's
> existing SMCP instance. It's unclear to me if this is still long-term
> thinking so we might want to revisit need for this function.
>
> From the completness point of view, if we allow to create/manipulate
> resources programatically as part of the Feature DSL, we should be able
> to register cleanup counter-part functions, especially if we cannot simply
> remove them (as this is handled through ownership of associated
> FeatureTracker).
>
> There is, however, a need to perform cleanup when particular
> component is "Removed" (not managed anymore). Right now this is
> handled inside its Reconcile function.

Fixes [RHOAIENG-8629](https://issues.redhat.com/browse/RHOAIENG-8629)

Relates to #1192 (comment)

Co-authored-by: Bartosz Majsak <[email protected]>

* fix: no need to return dsci

---------

Co-authored-by: Cameron Garrison <[email protected]>
  • Loading branch information
bartoszmajsak and cam-garrison authored Sep 13, 2024
1 parent 562efd1 commit 10dd554
Show file tree
Hide file tree
Showing 23 changed files with 260 additions and 170 deletions.
4 changes: 2 additions & 2 deletions components/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (c *Component) GetManagementState() operatorv1.ManagementState {
return c.ManagementState
}

func (c *Component) Cleanup(_ context.Context, _ client.Client, _ *dsciv1.DSCInitializationSpec) error {
func (c *Component) Cleanup(_ context.Context, _ client.Client, _ metav1.Object, _ *dsciv1.DSCInitializationSpec) error {
// noop
return nil
}
Expand Down Expand Up @@ -80,7 +80,7 @@ type ManifestsConfig struct {
type ComponentInterface interface {
ReconcileComponent(ctx context.Context, cli client.Client, logger logr.Logger,
owner metav1.Object, DSCISpec *dsciv1.DSCInitializationSpec, platform cluster.Platform, currentComponentStatus bool) error
Cleanup(ctx context.Context, cli client.Client, DSCISpec *dsciv1.DSCInitializationSpec) error
Cleanup(ctx context.Context, cli client.Client, owner metav1.Object, DSCISpec *dsciv1.DSCInitializationSpec) error
GetComponentName() string
GetManagementState() operatorv1.ManagementState
OverrideManifests(ctx context.Context, platform cluster.Platform) error
Expand Down
12 changes: 6 additions & 6 deletions components/kserve/kserve.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ func (k *Kserve) ReconcileComponent(ctx context.Context, cli client.Client,
monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed

if !enabled {
if err := k.removeServerlessFeatures(ctx, dscispec); err != nil {
if err := k.removeServerlessFeatures(ctx, cli, owner, dscispec); err != nil {
return err
}
} else {
// Configure dependencies
if err := k.configureServerless(ctx, cli, l, dscispec); err != nil {
if err := k.configureServerless(ctx, cli, l, owner, dscispec); err != nil {
return err
}
if k.DevFlags != nil {
Expand All @@ -123,7 +123,7 @@ func (k *Kserve) ReconcileComponent(ctx context.Context, cli client.Client,
}
}

if err := k.configureServiceMesh(ctx, cli, dscispec); err != nil {
if err := k.configureServiceMesh(ctx, cli, owner, dscispec); err != nil {
return fmt.Errorf("failed configuring service mesh while reconciling kserve component. cause: %w", err)
}

Expand Down Expand Up @@ -177,10 +177,10 @@ func (k *Kserve) ReconcileComponent(ctx context.Context, cli client.Client,
return nil
}

func (k *Kserve) Cleanup(ctx context.Context, cli client.Client, instance *dsciv1.DSCInitializationSpec) error {
if removeServerlessErr := k.removeServerlessFeatures(ctx, instance); removeServerlessErr != nil {
func (k *Kserve) Cleanup(ctx context.Context, cli client.Client, owner metav1.Object, instance *dsciv1.DSCInitializationSpec) error {
if removeServerlessErr := k.removeServerlessFeatures(ctx, cli, owner, instance); removeServerlessErr != nil {
return removeServerlessErr
}

return k.removeServiceMeshConfigurations(ctx, cli, instance)
return k.removeServiceMeshConfigurations(ctx, cli, owner, instance)
}
11 changes: 6 additions & 5 deletions components/kserve/kserve_config_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/hashicorp/go-multierror"
operatorv1 "github.com/openshift/api/operator/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1"
Expand Down Expand Up @@ -118,14 +119,14 @@ func (k *Kserve) setDefaultDeploymentMode(ctx context.Context, cli client.Client
return nil
}

func (k *Kserve) configureServerless(ctx context.Context, cli client.Client, logger logr.Logger, instance *dsciv1.DSCInitializationSpec) error {
func (k *Kserve) configureServerless(ctx context.Context, cli client.Client, logger logr.Logger, owner metav1.Object, instance *dsciv1.DSCInitializationSpec) error {
switch k.Serving.ManagementState {
case operatorv1.Unmanaged: // Bring your own CR
logger.Info("Serverless CR is not configured by the operator, we won't do anything")

case operatorv1.Removed: // we remove serving CR
logger.Info("existing Serverless CR (owned by operator) will be removed")
if err := k.removeServerlessFeatures(ctx, instance); err != nil {
if err := k.removeServerlessFeatures(ctx, cli, owner, instance); err != nil {
return err
}

Expand All @@ -147,7 +148,7 @@ func (k *Kserve) configureServerless(ctx context.Context, cli client.Client, log
return dependOpsErrors
}

serverlessFeatures := feature.ComponentFeaturesHandler(k.GetComponentName(), instance.ApplicationsNamespace, k.configureServerlessFeatures(instance))
serverlessFeatures := feature.ComponentFeaturesHandler(cli, owner, k.GetComponentName(), instance.ApplicationsNamespace, k.configureServerlessFeatures(instance))

if err := serverlessFeatures.Apply(ctx); err != nil {
return err
Expand All @@ -156,8 +157,8 @@ func (k *Kserve) configureServerless(ctx context.Context, cli client.Client, log
return nil
}

func (k *Kserve) removeServerlessFeatures(ctx context.Context, instance *dsciv1.DSCInitializationSpec) error {
serverlessFeatures := feature.ComponentFeaturesHandler(k.GetComponentName(), instance.ApplicationsNamespace, k.configureServerlessFeatures(instance))
func (k *Kserve) removeServerlessFeatures(ctx context.Context, cli client.Client, owner metav1.Object, instance *dsciv1.DSCInitializationSpec) error {
serverlessFeatures := feature.ComponentFeaturesHandler(cli, owner, k.GetComponentName(), instance.ApplicationsNamespace, k.configureServerlessFeatures(instance))

return serverlessFeatures.Delete(ctx)
}
Expand Down
11 changes: 6 additions & 5 deletions components/kserve/servicemesh_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"path"

operatorv1 "github.com/openshift/api/operator/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -16,22 +17,22 @@ import (
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/feature/servicemesh"
)

func (k *Kserve) configureServiceMesh(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec) error {
func (k *Kserve) configureServiceMesh(ctx context.Context, cli client.Client, owner metav1.Object, dscispec *dsciv1.DSCInitializationSpec) error {
if dscispec.ServiceMesh != nil {
if dscispec.ServiceMesh.ManagementState == operatorv1.Managed && k.GetManagementState() == operatorv1.Managed {
serviceMeshInitializer := feature.ComponentFeaturesHandler(k.GetComponentName(), dscispec.ApplicationsNamespace, k.defineServiceMeshFeatures(ctx, cli, dscispec))
serviceMeshInitializer := feature.ComponentFeaturesHandler(cli, owner, k.GetComponentName(), dscispec.ApplicationsNamespace, k.defineServiceMeshFeatures(ctx, cli, dscispec))
return serviceMeshInitializer.Apply(ctx)
}
if dscispec.ServiceMesh.ManagementState == operatorv1.Unmanaged && k.GetManagementState() == operatorv1.Managed {
return nil
}
}

return k.removeServiceMeshConfigurations(ctx, cli, dscispec)
return k.removeServiceMeshConfigurations(ctx, cli, owner, dscispec)
}

func (k *Kserve) removeServiceMeshConfigurations(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec) error {
serviceMeshInitializer := feature.ComponentFeaturesHandler(k.GetComponentName(), dscispec.ApplicationsNamespace, k.defineServiceMeshFeatures(ctx, cli, dscispec))
func (k *Kserve) removeServiceMeshConfigurations(ctx context.Context, cli client.Client, owner metav1.Object, dscispec *dsciv1.DSCInitializationSpec) error {
serviceMeshInitializer := feature.ComponentFeaturesHandler(cli, owner, k.GetComponentName(), dscispec.ApplicationsNamespace, k.defineServiceMeshFeatures(ctx, cli, dscispec))
return serviceMeshInitializer.Delete(ctx)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R
}
}
for _, component := range allComponents {
if err := component.Cleanup(ctx, r.Client, r.DataScienceCluster.DSCISpec); err != nil {
if err := component.Cleanup(ctx, r.Client, instance, r.DataScienceCluster.DSCISpec); err != nil {
return ctrl.Result{}, err
}
}
Expand Down Expand Up @@ -188,7 +188,7 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R
} else {
r.Log.Info("Finalization DataScienceCluster start deleting instance", "name", instance.Name, "finalizer", finalizerName)
for _, component := range allComponents {
if err := component.Cleanup(ctx, r.Client, r.DataScienceCluster.DSCISpec); err != nil {
if err := component.Cleanup(ctx, r.Client, instance, r.DataScienceCluster.DSCISpec); err != nil {
return ctrl.Result{}, err
}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/dscinitialization/servicemesh_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (r *DSCInitializationReconciler) removeServiceMesh(ctx context.Context, ins

func (r *DSCInitializationReconciler) serviceMeshCapability(instance *dsciv1.DSCInitialization, initialCondition *conditionsv1.Condition) *feature.HandlerWithReporter[*dsciv1.DSCInitialization] { //nolint:lll // Reason: generics are long
return feature.NewHandlerWithReporter(
feature.ClusterFeaturesHandler(instance, r.serviceMeshCapabilityFeatures(instance)),
feature.ClusterFeaturesHandler(r.Client, instance, r.serviceMeshCapabilityFeatures(instance)),
createCapabilityReporter(r.Client, instance, initialCondition),
)
}
Expand Down Expand Up @@ -119,7 +119,7 @@ func (r *DSCInitializationReconciler) authorizationCapability(ctx context.Contex
}

return feature.NewHandlerWithReporter(
feature.ClusterFeaturesHandler(instance, r.authorizationFeatures(instance)),
feature.ClusterFeaturesHandler(r.Client, instance, r.authorizationFeatures(instance)),
createCapabilityReporter(r.Client, instance, condition),
), nil
}
Expand Down
10 changes: 8 additions & 2 deletions controllers/status/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"

k8serr "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/util/retry"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -44,17 +45,22 @@ func UpdateWithRetry[T client.Object](ctx context.Context, cli client.Client, or
if !ok {
return *new(T), errors.New("failed to deep copy object")
}
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err := retry.OnError(retry.DefaultBackoff, retryOnNotFoundOrConflict, func() error {
if err := cli.Get(ctx, client.ObjectKeyFromObject(original), saved); err != nil {
return err
}

update(saved)

// Return err itself here (not wrapped inside another error)
// so that RetryOnConflict can identify it correctly.
// so that Retry can identify it correctly.
return cli.Status().Update(ctx, saved)
})

return saved, err
}

func retryOnNotFoundOrConflict(err error) bool {
// We are now sharing the client, read/write can occur on delay
return k8serr.IsConflict(err) || k8serr.IsNotFound(err)
}
5 changes: 5 additions & 0 deletions pkg/cluster/gvk/gvk.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ var (
Version: "v1",
Kind: "DataScienceCluster",
}
DSCInitialization = schema.GroupVersionKind{
Group: "dscinitialization.opendatahub.io",
Version: "v1",
Kind: "DSCInitialization",
}

Deployment = schema.GroupVersionKind{
Group: "apps",
Expand Down
2 changes: 2 additions & 0 deletions pkg/cluster/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ func WithOwnerReference(ownerReferences ...metav1.OwnerReference) MetaOptions {
}
}

// OwnedBy sets the owner reference for the object being created. It requires scheme to be passed
// as TypeMeta might not be set for the owning object, see: https://github.com/kubernetes-sigs/controller-runtime/issues/1517
func OwnedBy(owner metav1.Object, scheme *runtime.Scheme) MetaOptions {
return func(obj metav1.Object) error {
return controllerutil.SetOwnerReference(owner, obj, scheme)
Expand Down
85 changes: 45 additions & 40 deletions pkg/feature/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"fmt"

"github.com/hashicorp/go-multierror"
ofapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
"github.com/pkg/errors"
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -24,9 +24,10 @@ type featureBuilder struct {
featureName string
managed bool
source featurev1.Source
owner metav1.Object
targetNs string

config *rest.Config
client client.Client

builders []partialBuilder
}
Expand Down Expand Up @@ -94,6 +95,14 @@ func (fb *featureBuilder) Manifests(creators ...resource.Creator) *featureBuilde
return fb
}

// OwnedBy is optionally used to pass down the owning object in order to set the ownerReference
// in the corresponding feature tracker.
func (fb *featureBuilder) OwnedBy(object metav1.Object) *featureBuilder {
fb.owner = object

return fb
}

// Managed marks the feature as managed by the operator. This effectively marks all resources which are part of this feature
// as those that should be updated on operator reconcile.
// Managed marks the feature as managed by the operator.
Expand Down Expand Up @@ -184,6 +193,12 @@ func (fb *featureBuilder) OnDelete(cleanups ...CleanupFunc) *featureBuilder {
return fb
}

// UsingClient allows to provide a custom client to the feature. If not called, a default client will be created.
func (fb *featureBuilder) UsingClient(cli client.Client) *featureBuilder {
fb.client = cli
return fb
}

// Create creates a new Feature instance and add it to corresponding FeaturesHandler.
// The actual feature creation in the cluster is not performed here.
func (fb *featureBuilder) Create() (*Feature, error) {
Expand All @@ -197,18 +212,16 @@ func (fb *featureBuilder) Create() (*Feature, error) {
Enabled: alwaysEnabled,
Log: log.Log.WithName("features").WithValues("feature", fb.featureName),
source: &fb.source,
owner: fb.owner,
}

// UsingConfig builder wasn't called while constructing this feature.
// Get default settings and create needed clients.
if fb.config == nil {
if err := fb.withDefaultClient(); err != nil {
// UsingClient has not been called, so we need to create a new client
if fb.client == nil {
if err := createDefaultClient()(f); err != nil {
return nil, err
}
}

if err := createClient(fb.config)(f); err != nil {
return nil, err
} else {
f.Client = fb.client
}

for i := range fb.builders {
Expand All @@ -220,46 +233,38 @@ func (fb *featureBuilder) Create() (*Feature, error) {
return f, nil
}

// UsingConfig allows to pass a custom rest.Config to the feature. Useful for testing.
func (fb *featureBuilder) UsingConfig(config *rest.Config) *featureBuilder {
fb.config = config
return fb
}

func createClient(config *rest.Config) partialBuilder {
func createDefaultClient() partialBuilder {
return func(f *Feature) error {
var err error

f.Client, err = client.New(config, client.Options{})
restCfg, err := config.GetConfig()
if errors.Is(err, rest.ErrNotInCluster) {
// rollback to local kubeconfig - this can be helpful when running the process locally i.e. while debugging
kubeconfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
&clientcmd.ClientConfigLoadingRules{ExplicitPath: clientcmd.RecommendedHomeFile},
&clientcmd.ConfigOverrides{},
)

restCfg, err = kubeconfig.ClientConfig()
if err != nil {
return err
}
} else if err != nil {
return err
}

f.Client, err = client.New(restCfg, client.Options{})
if err != nil {
return errors.WithStack(err)
}

var multiErr *multierror.Error
s := f.Client.Scheme()
multiErr = multierror.Append(multiErr, featurev1.AddToScheme(s), apiextv1.AddToScheme(s), ofapiv1alpha1.AddToScheme(s))

return multiErr.ErrorOrNil()
}
}

func (fb *featureBuilder) withDefaultClient() error {
restCfg, err := config.GetConfig()
if errors.Is(err, rest.ErrNotInCluster) {
// rollback to local kubeconfig - this can be helpful when running the process locally i.e. while debugging
kubeconfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
&clientcmd.ClientConfigLoadingRules{ExplicitPath: clientcmd.RecommendedHomeFile},
&clientcmd.ConfigOverrides{},
multiErr = multierror.Append(multiErr,
featurev1.AddToScheme(s),
apiextv1.AddToScheme(s),
)

restCfg, err = kubeconfig.ClientConfig()
if err != nil {
return err
}
} else if err != nil {
return err
return multiErr.ErrorOrNil()
}

fb.config = restCfg
return nil
}
1 change: 1 addition & 0 deletions pkg/feature/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type Feature struct {

tracker *featurev1.FeatureTracker
source *featurev1.Source
owner metav1.Object

data map[string]any

Expand Down
Loading

0 comments on commit 10dd554

Please sign in to comment.