Skip to content

Commit

Permalink
nodepool_controller: add a reconciler for cleanup
Browse files Browse the repository at this point in the history
When we change the config for a NodePool or the way in which we hash the
values, we leak token and user data secrets. However, since these
secrets are annotated with the NodePool they were created for, it's
simple to check that the Secret still matches what we expect and clean
it up otherwise.

Signed-off-by: Steve Kuznetsov <[email protected]>
  • Loading branch information
stevekuznetsov committed May 6, 2024
1 parent a540b01 commit 986495f
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 40 deletions.
17 changes: 10 additions & 7 deletions hypershift-operator/controllers/nodepool/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,23 @@ func machineHealthCheck(nodePool *hyperv1.NodePool, controlPlaneNamespace string
}
}

const ignitionUserDataPrefix = "user-data"

func IgnitionUserDataSecret(namespace, name, payloadInputHash string) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: fmt.Sprintf("user-data-%s-%s", name, payloadInputHash),
},
}
return namedSecret(namespace, fmt.Sprintf("%s-%s-%s", ignitionUserDataPrefix, name, payloadInputHash))
}

const tokenSecretPrefix = "token"

func TokenSecret(namespace, name, payloadInputHash string) *corev1.Secret {
return namedSecret(namespace, fmt.Sprintf("%s-%s-%s", tokenSecretPrefix, name, payloadInputHash))
}

func namedSecret(namespace, name string) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: fmt.Sprintf("token-%s-%s", name, payloadInputHash),
Name: name,
},
}
}
Expand Down
217 changes: 184 additions & 33 deletions hypershift-operator/controllers/nodepool/nodepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
agentv1 "github.com/openshift/cluster-api-provider-agent/api/v1beta1"
performanceprofilev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1"

hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
"github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster"
"github.com/openshift/hypershift/hypershift-operator/controllers/manifests"
Expand Down Expand Up @@ -116,7 +115,6 @@ type NodePoolReconciler struct {
client.Client
recorder record.EventRecorder
ReleaseProvider releaseinfo.Provider
controller controller.Controller
upsert.CreateOrUpdateProvider
HypershiftOperatorImage string
ImageMetadataProvider supportutil.ImageMetadataProvider
Expand All @@ -140,7 +138,7 @@ var (
)

func (r *NodePoolReconciler) SetupWithManager(mgr ctrl.Manager) error {
controller, err := ctrl.NewControllerManagedBy(mgr).
if err := ctrl.NewControllerManagedBy(mgr).
For(&hyperv1.NodePool{}).
// We want to reconcile when the HostedCluster IgnitionEndpoint is available.
Watches(&hyperv1.HostedCluster{}, handler.EnqueueRequestsFromMapFunc(r.enqueueNodePoolsForHostedCluster)).
Expand All @@ -157,12 +155,22 @@ func (r *NodePoolReconciler) SetupWithManager(mgr ctrl.Manager) error {
RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, 10*time.Second),
MaxConcurrentReconciles: 10,
}).
Build(r)
if err != nil {
Complete(r); err != nil {
return errors.Wrap(err, "failed setting up with a controller manager")
}

if err := ctrl.NewControllerManagedBy(mgr).
For(&corev1.Secret{}).
WithOptions(controller.Options{
RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, 10*time.Second),
MaxConcurrentReconciles: 10,
}).
Complete(&secretJanitor{
NodePoolReconciler: r,
}); err != nil {
return errors.Wrap(err, "failed setting up with a controller manager")
}

r.controller = controller
r.recorder = mgr.GetEventRecorderFor("nodepool-controller")

return nil
Expand Down Expand Up @@ -266,26 +274,10 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
return ctrl.Result{}, nil
}

// 1. - Reconcile conditions according to current state of the world.
proxy := globalconfig.ProxyConfig()
globalconfig.ReconcileProxyConfigWithStatusFromHostedCluster(proxy, hcluster)

// NOTE: The image global config is not injected via userdata or NodePool ignition config.
// It is included directly by the ignition server. However, we need to detect the change
// here to trigger a nodepool update.
image := globalconfig.ImageConfig()
globalconfig.ReconcileImageConfigFromHostedCluster(image, hcluster)

// Serialize proxy and image into a single string to use in the token secret hash.
globalConfigBytes := bytes.NewBuffer(nil)
enc := json.NewEncoder(globalConfigBytes)
if err := enc.Encode(proxy); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to encode proxy global config: %w", err)
}
if err := enc.Encode(image); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to encode image global config: %w", err)
globalConfig, err := globalConfigString(hcluster)
if err != nil {
return ctrl.Result{}, err
}
globalConfig := globalConfigBytes.String()

if isAutoscalingEnabled(nodePool) {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Expand Down Expand Up @@ -561,11 +553,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
// 3 generic core config resources: fips, ssh and haproxy.
// TODO (alberto): consider moving the expectedCoreConfigResources check
// into the token Secret controller so we don't block Machine infra creation on this.
expectedCoreConfigResources := 3
if len(hcluster.Spec.ImageContentSources) > 0 {
// additional core config resource created when image content source specified.
expectedCoreConfigResources += 1
}
expectedCoreConfigResources := expectedCoreConfigResourcesForHostedCluster(hcluster)
config, missingConfigs, err := r.getConfig(ctx, nodePool, expectedCoreConfigResources, controlPlaneNamespace, releaseImage, hcluster)
if err != nil {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Expand Down Expand Up @@ -642,7 +630,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
}

// Signal ignition payload generation
targetPayloadConfigHash := supportutil.HashSimple(config + targetVersion + pullSecretName + globalConfig)
targetPayloadConfigHash := payloadConfigHash(config, targetVersion, pullSecretName, globalConfig)
tokenSecret := TokenSecret(controlPlaneNamespace, nodePool.Name, targetPayloadConfigHash)
condition, err := r.createValidGeneratedPayloadCondition(ctx, tokenSecret, nodePool.Generation)
if err != nil {
Expand Down Expand Up @@ -817,7 +805,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho

userDataSecret := IgnitionUserDataSecret(controlPlaneNamespace, nodePool.GetName(), targetPayloadConfigHash)
if result, err := r.CreateOrUpdate(ctx, r.Client, userDataSecret, func() error {
return reconcileUserDataSecret(userDataSecret, nodePool, caCertBytes, tokenBytes, ignEndpoint, targetPayloadConfigHash, proxy)
return reconcileUserDataSecret(userDataSecret, nodePool, caCertBytes, tokenBytes, ignEndpoint, targetPayloadConfigHash, globalconfig.ProxyConfig())
}); err != nil {
return ctrl.Result{}, err
} else {
Expand Down Expand Up @@ -970,6 +958,15 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
return ctrl.Result{}, nil
}

func expectedCoreConfigResourcesForHostedCluster(hcluster *hyperv1.HostedCluster) int {
expectedCoreConfigResources := 3
if len(hcluster.Spec.ImageContentSources) > 0 {
// additional core config resource created when image content source specified.
expectedCoreConfigResources += 1
}
return expectedCoreConfigResources
}

// setMachineAndNodeConditions sets the nodePool's AllMachinesReady and AllNodesHealthy conditions.
func (r *NodePoolReconciler) setMachineAndNodeConditions(ctx context.Context, nodePool *hyperv1.NodePool, hc *hyperv1.HostedCluster) error {
// Get all Machines for NodePool.
Expand Down Expand Up @@ -2848,8 +2845,12 @@ func (r *NodePoolReconciler) getPullSecretBytes(ctx context.Context, hostedClust

// getPullSecretName retrieves the name of the pull secret in the hosted cluster spec
func (r *NodePoolReconciler) getPullSecretName(ctx context.Context, hostedCluster *hyperv1.HostedCluster) (string, error) {
return getPullSecretName(ctx, r.Client, hostedCluster)
}

func getPullSecretName(ctx context.Context, crclient client.Client, hostedCluster *hyperv1.HostedCluster) (string, error) {
pullSecret := &corev1.Secret{}
if err := r.Client.Get(ctx, client.ObjectKey{Namespace: hostedCluster.Namespace, Name: hostedCluster.Spec.PullSecret.Name}, pullSecret); err != nil {
if err := crclient.Get(ctx, client.ObjectKey{Namespace: hostedCluster.Namespace, Name: hostedCluster.Spec.PullSecret.Name}, pullSecret); err != nil {
return "", fmt.Errorf("cannot get pull secret %s/%s: %w", hostedCluster.Namespace, hostedCluster.Spec.PullSecret.Name, err)
}
if _, hasKey := pullSecret.Data[corev1.DockerConfigJsonKey]; !hasKey {
Expand Down Expand Up @@ -2917,3 +2918,153 @@ func aggregateMachineMessages(msgs []string) string {

return builder.String()
}

// secretJanitor reconciles secrets and determines which secrets should remain in the cluster and which should be cleaned up.
// Any secret annotated with a nodePool name should only be on the cluster if the nodePool continues to exist
// and if our current calculation for the inputs to the name matches what the secret is named.
type secretJanitor struct {
*NodePoolReconciler
}

func (r *secretJanitor) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
log := ctrl.LoggerFrom(ctx)

secret := &corev1.Secret{}
if err := r.Client.Get(ctx, req.NamespacedName, secret); err != nil {
if apierrors.IsNotFound(err) {
log.Info("not found", "request", req.String())
return ctrl.Result{}, nil
}
log.Error(err, "error getting secret")
return ctrl.Result{}, err
}

// only handle secrets that are associated with a NodePool
nodePoolName, annotated := secret.Annotations[nodePoolAnnotation]
if !annotated {
return ctrl.Result{}, nil
}
log = log.WithValues("nodePool", nodePoolName)

// only handle secret types that we know about explicitly
shouldHandle := false
for _, prefix := range []string{tokenSecretPrefix, ignitionUserDataPrefix} {
if strings.HasPrefix(secret.Name, prefix) {
shouldHandle = true
break
}
}
if !shouldHandle {
return ctrl.Result{}, nil
}

nodePool := &hyperv1.NodePool{}
if err := r.Client.Get(ctx, supportutil.ParseNamespacedName(nodePoolName), nodePool); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "error getting nodepool")
return ctrl.Result{}, err
} else if apierrors.IsNotFound(err) {
log.Info("removing secret as nodePool is missing")
return ctrl.Result{}, r.Client.Delete(ctx, secret)
}

hcluster, err := GetHostedClusterByName(ctx, r.Client, nodePool.GetNamespace(), nodePool.Spec.ClusterName)
if err != nil {
return ctrl.Result{}, err
}

controlPlaneNamespace := manifests.HostedControlPlaneNamespace(hcluster.Namespace, hcluster.Name)
expectedCoreConfigResources := expectedCoreConfigResourcesForHostedCluster(hcluster)
releaseImage, err := r.getReleaseImage(ctx, hcluster, nodePool.Status.Version, nodePool.Spec.Release.Image)
if err != nil {
return ctrl.Result{}, err
}

config, missingConfigs, err := r.getConfig(ctx, nodePool, expectedCoreConfigResources, controlPlaneNamespace, releaseImage, hcluster)
if err != nil {
return ctrl.Result{}, err
}
if missingConfigs {
return ctrl.Result{}, nil
}
targetVersion := releaseImage.Version()

pullSecretName, err := r.getPullSecretName(ctx, hcluster)
if err != nil {
return ctrl.Result{}, err
}

globalConfig, err := globalConfigString(hcluster)
if err != nil {
return ctrl.Result{}, err
}

targetPayloadConfigHash := payloadConfigHash(config, targetVersion, pullSecretName, globalConfig)

// synchronously deleting the ignition token is unsafe; we need to clean up tokens by annotating them to expire
synchronousCleanup := func(ctx context.Context, c client.Client, secret *corev1.Secret) error {
return c.Delete(ctx, secret)
}
type nodePoolSecret struct {
expectedName string
matchingPrefix string
cleanup func(context.Context, client.Client, *corev1.Secret) error
}
valid := false
options := []nodePoolSecret{
{
expectedName: TokenSecret(controlPlaneNamespace, nodePool.Name, targetPayloadConfigHash).Name,
matchingPrefix: tokenSecretPrefix,
cleanup: setExpirationTimestampOnToken,
},
{
expectedName: IgnitionUserDataSecret(controlPlaneNamespace, nodePool.GetName(), targetPayloadConfigHash).Name,
matchingPrefix: ignitionUserDataPrefix,
cleanup: synchronousCleanup,
},
}
cleanup := synchronousCleanup
var names []string
for _, option := range options {
names = append(names, option.expectedName)
if secret.Name == option.expectedName {
valid = true
}
if strings.HasPrefix(secret.Name, option.matchingPrefix) {
cleanup = option.cleanup
}
}

if valid {
return ctrl.Result{}, nil
}

log.WithValues("options", names, "valid", valid).Info("removing secret as it does not match the expected set of names")
return ctrl.Result{}, cleanup(ctx, r.Client, secret)
}

func globalConfigString(hcluster *hyperv1.HostedCluster) (string, error) {
// 1. - Reconcile conditions according to current state of the world.
proxy := globalconfig.ProxyConfig()
globalconfig.ReconcileProxyConfigWithStatusFromHostedCluster(proxy, hcluster)

// NOTE: The image global config is not injected via userdata or NodePool ignition config.
// It is included directly by the ignition server. However, we need to detect the change
// here to trigger a nodepool update.
image := globalconfig.ImageConfig()
globalconfig.ReconcileImageConfigFromHostedCluster(image, hcluster)

// Serialize proxy and image into a single string to use in the token secret hash.
globalConfigBytes := bytes.NewBuffer(nil)
enc := json.NewEncoder(globalConfigBytes)
if err := enc.Encode(proxy); err != nil {
return "", fmt.Errorf("failed to encode proxy global config: %w", err)
}
if err := enc.Encode(image); err != nil {
return "", fmt.Errorf("failed to encode image global config: %w", err)
}
return globalConfigBytes.String(), nil
}

func payloadConfigHash(config, targetVersion, pullSecretName, globalConfig string) string {
return supportutil.HashSimple(config + targetVersion + pullSecretName + globalConfig)
}

0 comments on commit 986495f

Please sign in to comment.