Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remediator missing custom resource events #1441

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions cmd/reconciler-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"

"github.com/go-logr/logr"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -33,6 +34,7 @@ import (
"kpt.dev/configsync/pkg/profiler"
"kpt.dev/configsync/pkg/reconcilermanager"
"kpt.dev/configsync/pkg/reconcilermanager/controllers"
"kpt.dev/configsync/pkg/util/customresource"
"kpt.dev/configsync/pkg/util/log"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -95,9 +97,10 @@ func main() {
}
watchFleetMembership := fleetMembershipCRDExists(dynamicClient, mgr.GetRESTMapper(), &setupLog)

crdController := controllers.NewCRDReconciler(
crdController := &controllers.CRDController{}
crdMetaController := controllers.NewCRDMetaController(crdController, mgr.GetCache(),
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName("CRD"))
if err := crdController.Register(mgr); err != nil {
if err := crdMetaController.Register(mgr); err != nil {
setupLog.Error(err, "failed to register controller", "controller", "CRD")
os.Exit(1)
}
Expand All @@ -108,11 +111,15 @@ func main() {
mgr.GetClient(), watcher, dynamicClient,
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName(configsync.RepoSyncKind),
mgr.GetScheme())
crdController.SetCRDHandler(configsync.RepoSyncCRDName, func() error {
if err := repoSyncController.Register(mgr, watchFleetMembership); err != nil {
return fmt.Errorf("registering %s controller: %w", configsync.RepoSyncKind, err)
crdController.SetReconciler(kinds.RepoSyncV1Beta1().GroupKind(), func(_ context.Context, crd *apiextensionsv1.CustomResourceDefinition) error {
if customresource.IsEstablished(crd) {
if err := repoSyncController.Register(mgr, watchFleetMembership); err != nil {
return fmt.Errorf("registering %s controller: %w", configsync.RepoSyncKind, err)
}
setupLog.Info("RepoSync controller registration successful")
}
setupLog.Info("RepoSync controller registration successful")
// Don't stop the RepoSync controller when its CRD is deleted,
// otherwise we may miss RepoSync object deletion events.
return nil
})
setupLog.Info("RepoSync controller registration scheduled")
Expand All @@ -122,11 +129,15 @@ func main() {
mgr.GetClient(), watcher, dynamicClient,
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName(configsync.RootSyncKind),
mgr.GetScheme())
crdController.SetCRDHandler(configsync.RootSyncCRDName, func() error {
if err := rootSyncController.Register(mgr, watchFleetMembership); err != nil {
return fmt.Errorf("registering %s controller: %w", configsync.RootSyncKind, err)
crdController.SetReconciler(kinds.RootSyncV1Beta1().GroupKind(), func(_ context.Context, crd *apiextensionsv1.CustomResourceDefinition) error {
if customresource.IsEstablished(crd) {
if err := rootSyncController.Register(mgr, watchFleetMembership); err != nil {
return fmt.Errorf("registering %s controller: %w", configsync.RootSyncKind, err)
}
setupLog.Info("RootSync controller registration successful")
}
setupLog.Info("RootSync controller registration successful")
// Don't stop the RootSync controller when its CRD is deleted,
// otherwise we may miss RootSync object deletion events.
return nil
})
setupLog.Info("RootSync controller registration scheduled")
Expand Down
17 changes: 10 additions & 7 deletions e2e/testcases/custom_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,16 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {
nt.T.Fatal(err)
}

// Resource Conflict errors from the remediator are not exposed as errors
// in the RootSync status. Instead, the error is recorded as a metric and
// logged as a warning. Then the object is refreshed from the server and
// re-enqueued for remediation.
// Resource conflict errors are recorded as status errors when the
// remediator watches are updated after an apply succeeds, but not when
// watches are updated before the apply attempt or from watch events handled
// by the remediator. So we don't expect to see a resource conflict error
// in the RootSync status until after the next apply attempt fails, which
// won't happen until the next automatic re-sync (1hr default).
//
// Validate that deleting the CRD of a managed CR causes at least of of the
// following errors:
// However, we do expect the remediator to get a deletion event for each
// Anvil object after the Anvil CRD is deleted. This can be surfaced as one
// of the following errors:
// - NoResourceMatchError
// - NoKindMatchError
// - ObjectNotFound
Expand All @@ -117,7 +120,7 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {
// TODO: distinguish between management conflict (unexpected manager annotation) and resource conflict (resource version change)
nt.Must(nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, firstCommitHash, metrics.ErrorSummary{
Conflicts: 1,
Conflicts: 1, // at least 1
})))

// Reset discovery client to invalidate the cached Anvil CRD
Expand Down
1 change: 1 addition & 0 deletions manifests/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ resources:
# Applying hierarchyconfig-crd.yaml allows client-side validation of the HierarchyConfig resources.
- ../hierarchyconfig-crd.yaml
- ../namespace-selector-crd.yaml
- ../ns-reconciler-cluster-scope-cluster-role.yaml
- ../ns-reconciler-base-cluster-role.yaml
- ../root-reconciler-base-cluster-role.yaml
- ../otel-agent-cm.yaml
Expand Down
27 changes: 27 additions & 0 deletions manifests/ns-reconciler-cluster-scope-cluster-role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This ClusterRole is used by both root-reconcilers and ns-reconcilers.
# It includes read access for cluster-scope resources.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: configsync.gke.io:ns-reconciler:cluster-scope
labels:
configmanagement.gke.io/system: "true"
configmanagement.gke.io/arch: "csmr"
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get","list","watch"]
3 changes: 3 additions & 0 deletions manifests/root-reconciler-base-cluster-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ rules:
- apiGroups: ["kpt.dev"]
resources: ["resourcegroups/status"]
verbs: ["*"]
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get","list","watch"]
10 changes: 9 additions & 1 deletion pkg/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"kpt.dev/configsync/pkg/parse/events"
"kpt.dev/configsync/pkg/reconciler/finalizer"
"kpt.dev/configsync/pkg/reconciler/namespacecontroller"
"kpt.dev/configsync/pkg/reconcilermanager/controllers"
"kpt.dev/configsync/pkg/remediator"
"kpt.dev/configsync/pkg/remediator/conflict"
"kpt.dev/configsync/pkg/remediator/watch"
Expand Down Expand Up @@ -219,10 +220,11 @@ func Run(opts Options) {
klog.Fatalf("Error creating rest config for the remediator: %v", err)
}

crdController := &controllers.CRDController{}
conflictHandler := conflict.NewHandler()
fightHandler := fight.NewHandler()

rem, err := remediator.New(opts.ReconcilerScope, opts.SyncName, cfgForWatch, baseApplier, conflictHandler, fightHandler, decls, opts.NumWorkers)
rem, err := remediator.New(opts.ReconcilerScope, opts.SyncName, cfgForWatch, baseApplier, conflictHandler, fightHandler, crdController, decls, opts.NumWorkers)
if err != nil {
klog.Fatalf("Instantiating Remediator: %v", err)
}
Expand Down Expand Up @@ -331,6 +333,12 @@ func Run(opts Options) {
klog.Fatalf("Instantiating Controller Manager: %v", err)
}

crdMetaController := controllers.NewCRDMetaController(crdController, mgr.GetCache(),
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName("CRD"))
if err := crdMetaController.Register(mgr); err != nil {
klog.Fatalf("Instantiating CRD Controller: %v", err)
}

// This cancelFunc will be used by the Finalizer to stop all the other
// controllers (Parser & Remediator).
ctx, stopControllers := context.WithCancel(signalCtx)
Expand Down
14 changes: 12 additions & 2 deletions pkg/reconcilermanager/controllers/build_names.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,25 @@ import (
)

const (
// RepoSyncClusterScopeClusterRoleName is the name of the ClusterRole with
// cluster-scoped read permissions for the namespace reconciler.
// e.g. configsync.gke.io:ns-reconciler:cluster-scope
RepoSyncClusterScopeClusterRoleName = configsync.GroupName + ":" + core.NsReconcilerPrefix + ":cluster-scope"
// RepoSyncBaseClusterRoleName is the namespace reconciler permissions name.
// e.g. configsync.gke.io:ns-reconciler
RepoSyncBaseClusterRoleName = configsync.GroupName + ":" + core.NsReconcilerPrefix
// RootSyncBaseClusterRoleName is the root reconciler base ClusterRole name.
// e.g. configsync.gke.io:root-reconciler
RootSyncBaseClusterRoleName = configsync.GroupName + ":" + core.RootReconcilerPrefix
// RepoSyncClusterScopeClusterRoleBindingName is the name of the default
// ClusterRoleBinding created for RepoSync objects. This contains basic
// cluster-scoped permissions for RepoSync reconcilers
// (e.g. CustomResourceDefinition watch).
RepoSyncClusterScopeClusterRoleBindingName = RepoSyncClusterScopeClusterRoleName
// RepoSyncBaseRoleBindingName is the name of the default RoleBinding created
// for RepoSync objects. This contains basic permissions for RepoSync reconcilers
//(e.g. RepoSync status update).
// for RepoSync objects. This contains basic namespace-scoped permissions
// for RepoSync reconcilers
// (e.g. RepoSync status update).
RepoSyncBaseRoleBindingName = RepoSyncBaseClusterRoleName
// RootSyncLegacyClusterRoleBindingName is the name of the legacy ClusterRoleBinding created
// for RootSync objects. It is always bound to cluster-admin.
Expand Down
Loading