Skip to content

Commit

Permalink
Allow finalizers to be removed when deleting CRDs
Browse files Browse the repository at this point in the history
See kubernetes-retired#824. When the CRDs are being deleted, we need to continue
reconciling the CRs until they're gone in order to ensure that any
finalizers on the CRs are removed.

Tested: new test script usually fails without this fix, but reliably
passes with it. Observed the new log messages working as expected while
running the script. For subnamespace anchors, tried removing the
CRD-checking code entirely and verified that
hack/test-delete-anchor-crd.sh failed, and then replaced it with the
current code (the shared isDeletingCRD function) and verified that it
passed again.
  • Loading branch information
adrianludwin committed Jul 2, 2020
1 parent efa6555 commit 7a4d3cb
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 51 deletions.
11 changes: 9 additions & 2 deletions incubator/hnc/api/v1alpha1/hierarchy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
CannotUpdate Code = "CannotUpdateObject"
CritAncestor Code = "CritAncestor"
CritCycle Code = "CritCycle"
CritDeletingCRD Code = "CritDeletingCRD"
CritParentMissing Code = "CritParentMissing"
SubnamespaceAnchorMissing Code = "SubnamespaceAnchorMissing"
)
Expand All @@ -57,6 +58,7 @@ var AllCodes = []Code{
CannotUpdate,
CritAncestor,
CritCycle,
CritDeletingCRD,
CritParentMissing,
SubnamespaceAnchorMissing,
}
Expand Down Expand Up @@ -165,6 +167,10 @@ type Condition struct {
// parent is namespace A, but namespace A says that its parent is namespace B, then A and B are in
// a cycle with each other and both of them will have the CritCycle condition.
//
// - "CritDeletingCRD": The HierarchyConfiguration CRD is being deleted. No more objects will be
// propagated into or out of this namespace. It is expected that the HNC controller will be
// stopped soon after the CRDs are fully deleted.
//
// - "CritAncestor": a critical error exists in an ancestor namespace, so this namespace is no
// longer being updated either.
//
Expand Down Expand Up @@ -269,9 +275,10 @@ func init() {
SchemeBuilder.Register(&HierarchyConfiguration{}, &HierarchyConfigurationList{})
ClearConditionCriteria = map[Code]ClearConditionCriterion{
// All conditions on namespaces are set/cleared manually by the HCR
CritParentMissing: CCCManual,
CritCycle: CCCManual,
CritAncestor: CCCManual,
CritCycle: CCCManual,
CritDeletingCRD: CCCManual,
CritParentMissing: CCCManual,
SubnamespaceAnchorMissing: CCCManual,

// A source object can cause the CannotPropagate condition in two ways: if it cannot be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,15 @@ spec:
example, if namespace B says that its parent is namespace A,
but namespace A says that its parent is namespace B, then A
and B are in a cycle with each other and both of them will have
the CritCycle condition. \n - \"CritAncestor\": a critical error
exists in an ancestor namespace, so this namespace is no longer
being updated either. \n - \"SubnamespaceAnchorMissing\": this
namespace is a subnamespace, but the anchor referenced in its
`subnamespaceOf` annotation does not exist in the parent. \n
- \"CannotPropagateObject\": this namespace contains an object
the CritCycle condition. \n - \"CritDeletingCRD\": The HierarchyConfiguration
CRD is being deleted. No more objects will be propagated into
or out of this namespace. It is expected that the HNC controller
will be stopped soon after the CRDs are fully deleted. \n -
\"CritAncestor\": a critical error exists in an ancestor namespace,
so this namespace is no longer being updated either. \n - \"SubnamespaceAnchorMissing\":
this namespace is a subnamespace, but the anchor referenced
in its `subnamespaceOf` annotation does not exist in the parent.
\n - \"CannotPropagateObject\": this namespace contains an object
that couldn't be propagated *out* of this namespace, to one
or more of this namespace's descendants. If the object couldn't
be propagated to *any* descendants - for example, because it
Expand Down
47 changes: 47 additions & 0 deletions incubator/hnc/hack/test-delete-hc-and-anchor-crd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
# see https://github.com/kubernetes-sigs/multi-tenancy/issues/824

echo "THIS TEST WILL DELETE CRITICAL PARTS OF HNC. DO NOT RUN UNLESS YOU KNOW WHAT YOU'RE DOING"
echo "You have five seconds to turn back!"
sleep 5


echo "-------------------------------------------------------"
echo "Cleaning up from the last run. This may cause errors"

p=delete-hc-anchor-crd-parent
c=delete-hc-anchor-crd-child

kubectl hns set -a $p
sleep 1
kubectl delete ns $p

# Fail on any error from here on in.
set -e

echo "-------------------------------------------------------"
echo "Creating parent and child"

p=delete-hc-anchor-crd-parent
c=delete-hc-anchor-crd-child

kubectl create ns $p
kubectl hns create $c -n $p

echo "-------------------------------------------------------"
echo "Delete the HC CRD, then wait 1s, then delete the anchor CRD. HNC IS NOW IN A BAD STATE AND MUST BE REINSTALLED"

kubectl delete crd hierarchyconfigurations.hnc.x-k8s.io &
sleep 1
kubectl delete crd subnamespaceanchors.hnc.x-k8s.io &
kubectl delete crd hncconfigurations.hnc.x-k8s.io &

echo "-------------------------------------------------------"
echo "Sleeping for 10s to give HNC the chance to fully delete everything (5s wasn't enough)"

sleep 10

echo "-------------------------------------------------------"
echo "Verify that the HNC CRDs are gone (if nothing's printed, then they are)"

kubectl get crd | grep hnc
18 changes: 1 addition & 17 deletions incubator/hnc/internal/reconcilers/anchor.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -113,7 +112,7 @@ func (r *AnchorReconciler) onDeleting(ctx context.Context, log logr.Logger, inst
return false, nil
}

deletingCRD, err := r.isDeletingCRD(ctx)
deletingCRD, err := isDeletingCRD(ctx, r, api.Anchors)
if err != nil {
log.Info("Couldn't determine if CRD is being deleted")
return false, err
Expand Down Expand Up @@ -142,21 +141,6 @@ func (r *AnchorReconciler) onDeleting(ctx context.Context, log logr.Logger, inst
}
}

// isDeletingCRD returns true if the Anchor CRD is being deleted. This can happen if HNC is being
// uninstalled. In such cases, we shouldn't perform a cascading deletion.
func (r *AnchorReconciler) isDeletingCRD(ctx context.Context) (bool, error) {
crd := &apiextensions.CustomResourceDefinition{}
nsn := types.NamespacedName{Name: api.Anchors + "." + api.MetaGroup}
if err := r.Get(ctx, nsn, crd); err != nil {
// Either the CRD wasn't found, in which case, HNC can't operate propertly; hopefully, the admin
// is uninstalling HNC, and the HNC pod is also about to be shut down. Otherwise, there was some
// transient error and we should just retry.
return false, err
}

return !crd.DeletionTimestamp.IsZero(), nil
}

// shouldDeleteSubns returns true if the namespace still exists and it is a leaf
// subnamespace or it allows cascading delete unless the CRD is being deleted.
func (r *AnchorReconciler) shouldDeleteSubns(inst *api.SubnamespaceAnchor, nsInst *corev1.Namespace, deletingCRD bool) bool {
Expand Down
57 changes: 31 additions & 26 deletions incubator/hnc/internal/reconcilers/hierarchy_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package reconcilers

import (
"context"
"errors"
"fmt"
"reflect"
"strconv"
Expand All @@ -26,7 +25,6 @@ import (

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -109,10 +107,16 @@ func (r *HierarchyConfigReconciler) reconcile(ctx context.Context, log logr.Logg
origNS := nsInst.DeepCopy()

// Get singleton from apiserver. If it doesn't exist, initialize one.
inst, err := r.getSingleton(ctx, nm)
inst, deletingCRD, err := r.getSingleton(ctx, nm)
if err != nil {
return err
}
// Don't _create_ the singleton if its CRD is being deleted. But if the singleton is already
// _present_, we may need to update it to remove finalizers (see #824).
if deletingCRD && inst.CreationTimestamp.IsZero() {
log.Info("Skipping reconcile due to CRD deletion")
return nil
}
origHC := inst.DeepCopy()

// Get a list of subnamespace anchors from apiserver.
Expand All @@ -125,7 +129,7 @@ func (r *HierarchyConfigReconciler) reconcile(ctx context.Context, log logr.Logg
r.updateFinalizers(ctx, log, inst, nsInst, anms)

// Sync the Hierarchy singleton with the in-memory forest.
r.syncWithForest(log, nsInst, inst, anms)
r.syncWithForest(log, nsInst, inst, deletingCRD, anms)

// Write back if anything's changed. Early-exit if we just write back exactly what we had.
if updated, err := r.writeInstances(ctx, log, origHC, inst, origNS, nsInst); !updated || err != nil {
Expand Down Expand Up @@ -191,7 +195,7 @@ func (r *HierarchyConfigReconciler) updateFinalizers(ctx context.Context, log lo
// be able to proceed until this one is finished. While the results of the reconiliation may not be
// fully written back to the apiserver yet, each namespace is reconciled in isolation (apart from
// the in-memory forest) so this is fine.
func (r *HierarchyConfigReconciler) syncWithForest(log logr.Logger, nsInst *corev1.Namespace, inst *api.HierarchyConfiguration, anms []string) {
func (r *HierarchyConfigReconciler) syncWithForest(log logr.Logger, nsInst *corev1.Namespace, inst *api.HierarchyConfiguration, deletingCRD bool, anms []string) {
r.Forest.Lock()
defer r.Forest.Unlock()
ns := r.Forest.Get(inst.ObjectMeta.Namespace)
Expand All @@ -218,7 +222,7 @@ func (r *HierarchyConfigReconciler) syncWithForest(log logr.Logger, nsInst *core

// Sync the status
inst.Status.Children = ns.ChildNames()
r.syncConditions(log, inst, ns, hadCrit)
r.syncConditions(log, inst, ns, deletingCRD, hadCrit)

// Sync the tree labels. This should go last since it can depend on the conditions.
r.syncLabel(log, nsInst, ns)
Expand Down Expand Up @@ -396,12 +400,12 @@ func (r *HierarchyConfigReconciler) syncLabel(log logr.Logger, nsInst *corev1.Na
}
}

func (r *HierarchyConfigReconciler) syncConditions(log logr.Logger, inst *api.HierarchyConfiguration, ns *forest.Namespace, hadCrit bool) {
func (r *HierarchyConfigReconciler) syncConditions(log logr.Logger, inst *api.HierarchyConfiguration, ns *forest.Namespace, deletingCRD, hadCrit bool) {
// Hierarchy changes may mean that some object conditions are no longer relevant.
ns.ClearObsoleteConditions(log)

// Sync critical conditions after all locally-set conditions are updated.
r.syncCritConditions(log, ns, hadCrit)
r.syncCritConditions(log, ns, deletingCRD, hadCrit)

// Convert and pass in-memory conditions to HierarchyConfiguration object.
inst.Status.Conditions = ns.Conditions()
Expand All @@ -411,13 +415,17 @@ func (r *HierarchyConfigReconciler) syncConditions(log logr.Logger, inst *api.Hi

// syncCritConditions enqueues the children of a namespace if the existing critical conditions in the
// namespace are gone or critical conditions are newly found.
func (r *HierarchyConfigReconciler) syncCritConditions(log logr.Logger, ns *forest.Namespace, hadCrit bool) {
func (r *HierarchyConfigReconciler) syncCritConditions(log logr.Logger, ns *forest.Namespace, deletingCRD, hadCrit bool) {
// If we're in a cycle, determine that now
if cycle := ns.CycleNames(); cycle != nil {
msg := fmt.Sprintf("Namespace is a member of the cycle: %s", strings.Join(cycle, " <- "))
ns.SetLocalCondition(api.CritCycle, msg)
}

if deletingCRD {
ns.SetLocalCondition(api.CritDeletingCRD, "The HierarchyConfiguration CRD is being deleted; all syncing is disabled.")
}

// Early exit if there's no need to enqueue relatives.
if hadCrit == ns.HasLocalCritCondition() {
return
Expand All @@ -427,7 +435,7 @@ func (r *HierarchyConfigReconciler) syncCritConditions(log logr.Logger, ns *fore
if hadCrit == true {
msg = "removed"
}
log.Info("Critical conditions are " + msg)
log.Info("Critical conditions are "+msg, "conditions", ns.Conditions())
r.enqueueAffected(log, "descendant of a namespace with critical conditions "+msg, ns.DescendantNames()...)
}

Expand Down Expand Up @@ -552,13 +560,14 @@ func (r *HierarchyConfigReconciler) updateObjects(ctx context.Context, log logr.
return nil
}

// getSingleton returns the singleton if it exists, or creates an empty one if it doesn't.
func (r *HierarchyConfigReconciler) getSingleton(ctx context.Context, nm string) (*api.HierarchyConfiguration, error) {
// getSingleton returns the singleton if it exists, or creates an empty one if
// it doesn't. The second parameter is true if the CRD itself is being deleted.
func (r *HierarchyConfigReconciler) getSingleton(ctx context.Context, nm string) (*api.HierarchyConfiguration, bool, error) {
nnm := types.NamespacedName{Namespace: nm, Name: api.Singleton}
inst := &api.HierarchyConfiguration{}
if err := r.Get(ctx, nnm, inst); err != nil {
if !apierrors.IsNotFound(err) {
return nil, err
return nil, false, err
}

// It doesn't exist - initialize it to a sane initial value.
Expand All @@ -567,23 +576,19 @@ func (r *HierarchyConfigReconciler) getSingleton(ctx context.Context, nm string)
}

// If the HC is either being deleted, or it doesn't exist, this may be because HNC is being
// uninstalled and the HierarchyConfiguration CRD is being/has been deleted. If so, simply start
// returning errors - do _not_ attempt to sync the rest of the forest. This will ensure that the
// object reconciler won't start deleting objects, both because existing namespaces won't suddenly
// look like roots, or if HNC is restarted, because the namespace will never be set as Exists()
// and therefore will be left alone.
// uninstalled and the HierarchyConfiguration CRD is being/has been deleted. If so, we'll need to
// put a critical condition on this singleton so that we stop making any changes to its objects,
// but we can't just stop syncing it because we may need to delete its finalizers (see #824).
deletingCRD := false
if inst.CreationTimestamp.IsZero() || !inst.DeletionTimestamp.IsZero() {
crd := &apiextensions.CustomResourceDefinition{}
nsn := types.NamespacedName{Name: api.HierarchyConfigurations + "." + api.MetaGroup}
if err := r.Get(ctx, nsn, crd); err != nil {
return nil, err
}
if !crd.DeletionTimestamp.IsZero() {
return nil, errors.New("HierarchyConfiguration CRD is being deleted, cannot load")
var err error
deletingCRD, err = isDeletingCRD(ctx, r, api.HierarchyConfigurations)
if err != nil {
return nil, false, err
}
}

return inst, nil
return inst, deletingCRD, nil
}

// getNamespace returns the namespace if it exists, or returns an invalid, blank, unnamed one if it
Expand Down
5 changes: 5 additions & 0 deletions incubator/hnc/internal/reconcilers/hnc_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ func (r *ConfigReconciler) isRoleBinding(t api.TypeSynchronizationSpec) bool {
// reconciler is called very infrequently and is not performance critical.
func (r *ConfigReconciler) writeSingleton(ctx context.Context, inst *api.HNCConfiguration) error {
if inst.CreationTimestamp.IsZero() {
// No point creating it if the CRD's being deleted
if isDeleted, err := isDeletingCRD(ctx, r, api.HNCConfigSingletons); isDeleted || err != nil {
r.Log.Info("CRD is being deleted (or CRD deletion status couldn't be determined); skip update")
return err
}
r.Log.Info("Creating a default singleton on apiserver")
if err := r.Create(ctx, inst); err != nil {
r.Log.Error(err, "while creating on apiserver")
Expand Down
25 changes: 25 additions & 0 deletions incubator/hnc/internal/reconcilers/setup.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
package reconcilers

import (
"context"
"fmt"

apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/event"

api "sigs.k8s.io/multi-tenancy/incubator/hnc/api/v1alpha1"
"sigs.k8s.io/multi-tenancy/incubator/hnc/internal/forest"
)

Expand Down Expand Up @@ -54,3 +60,22 @@ func Create(mgr ctrl.Manager, f *forest.Forest, maxReconciles int) error {

return nil
}

// crdClient is any client that can be used in isDeletingCRD (i.e. any reconciler).
type crdClient interface {
Get(context.Context, types.NamespacedName, runtime.Object) error
}

// isDeletingCRD returns true if the specified HNC CRD is being or has been deleted. The argument
// expected is the CRD name minus the HNC suffix, e.g. "hierarchyconfigurations".
func isDeletingCRD(ctx context.Context, c crdClient, nm string) (bool, error) {
crd := &apiextensions.CustomResourceDefinition{}
nsn := types.NamespacedName{Name: nm + "." + api.MetaGroup}
if err := c.Get(ctx, nsn, crd); err != nil {
if apierrors.IsNotFound(err) {
return true, nil
}
return false, err
}
return !crd.DeletionTimestamp.IsZero(), nil
}

0 comments on commit 7a4d3cb

Please sign in to comment.