diff --git a/hypershift-operator/controllers/nodepool/manifests.go b/hypershift-operator/controllers/nodepool/manifests.go index 2842290835a..994b099171a 100644 --- a/hypershift-operator/controllers/nodepool/manifests.go +++ b/hypershift-operator/controllers/nodepool/manifests.go @@ -42,20 +42,23 @@ func machineHealthCheck(nodePool *hyperv1.NodePool, controlPlaneNamespace string } } +const ignitionUserDataPrefix = "user-data" + func IgnitionUserDataSecret(namespace, name, payloadInputHash string) *corev1.Secret { - return &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: fmt.Sprintf("user-data-%s-%s", name, payloadInputHash), - }, - } + return namedSecret(namespace, fmt.Sprintf("%s-%s-%s", ignitionUserDataPrefix, name, payloadInputHash)) } +const tokenSecretPrefix = "token" + func TokenSecret(namespace, name, payloadInputHash string) *corev1.Secret { + return namedSecret(namespace, fmt.Sprintf("%s-%s-%s", tokenSecretPrefix, name, payloadInputHash)) +} + +func namedSecret(namespace, name string) *corev1.Secret { return &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, - Name: fmt.Sprintf("token-%s-%s", name, payloadInputHash), + Name: name, }, } } diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go index 17ecefcb7c1..b6f15a01011 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go @@ -24,7 +24,6 @@ import ( agentv1 "github.com/openshift/cluster-api-provider-agent/api/v1beta1" performanceprofilev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2" tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" - hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" "github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster" "github.com/openshift/hypershift/hypershift-operator/controllers/manifests" @@ -116,7 +115,6 @@ type NodePoolReconciler struct { client.Client recorder record.EventRecorder ReleaseProvider releaseinfo.Provider - controller controller.Controller upsert.CreateOrUpdateProvider HypershiftOperatorImage string ImageMetadataProvider supportutil.ImageMetadataProvider @@ -140,7 +138,7 @@ var ( ) func (r *NodePoolReconciler) SetupWithManager(mgr ctrl.Manager) error { - controller, err := ctrl.NewControllerManagedBy(mgr). + if err := ctrl.NewControllerManagedBy(mgr). For(&hyperv1.NodePool{}). // We want to reconcile when the HostedCluster IgnitionEndpoint is available. Watches(&hyperv1.HostedCluster{}, handler.EnqueueRequestsFromMapFunc(r.enqueueNodePoolsForHostedCluster)). @@ -157,12 +155,22 @@ func (r *NodePoolReconciler) SetupWithManager(mgr ctrl.Manager) error { RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, 10*time.Second), MaxConcurrentReconciles: 10, }). - Build(r) - if err != nil { + Complete(r); err != nil { + return errors.Wrap(err, "failed setting up with a controller manager") + } + + if err := ctrl.NewControllerManagedBy(mgr). + For(&corev1.Secret{}). + WithOptions(controller.Options{ + RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, 10*time.Second), + MaxConcurrentReconciles: 10, + }). + Complete(&secretJanitor{ + NodePoolReconciler: r, + }); err != nil { return errors.Wrap(err, "failed setting up with a controller manager") } - r.controller = controller r.recorder = mgr.GetEventRecorderFor("nodepool-controller") return nil @@ -266,26 +274,10 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho return ctrl.Result{}, nil } - // 1. - Reconcile conditions according to current state of the world. - proxy := globalconfig.ProxyConfig() - globalconfig.ReconcileProxyConfigWithStatusFromHostedCluster(proxy, hcluster) - - // NOTE: The image global config is not injected via userdata or NodePool ignition config. - // It is included directly by the ignition server. However, we need to detect the change - // here to trigger a nodepool update. - image := globalconfig.ImageConfig() - globalconfig.ReconcileImageConfigFromHostedCluster(image, hcluster) - - // Serialize proxy and image into a single string to use in the token secret hash. - globalConfigBytes := bytes.NewBuffer(nil) - enc := json.NewEncoder(globalConfigBytes) - if err := enc.Encode(proxy); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to encode proxy global config: %w", err) - } - if err := enc.Encode(image); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to encode image global config: %w", err) + globalConfig, err := globalConfigString(hcluster) + if err != nil { + return ctrl.Result{}, err } - globalConfig := globalConfigBytes.String() if isAutoscalingEnabled(nodePool) { SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{ @@ -561,11 +553,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho // 3 generic core config resources: fips, ssh and haproxy. // TODO (alberto): consider moving the expectedCoreConfigResources check // into the token Secret controller so we don't block Machine infra creation on this. - expectedCoreConfigResources := 3 - if len(hcluster.Spec.ImageContentSources) > 0 { - // additional core config resource created when image content source specified. - expectedCoreConfigResources += 1 - } + expectedCoreConfigResources := expectedCoreConfigResourcesForHostedCluster(hcluster) config, missingConfigs, err := r.getConfig(ctx, nodePool, expectedCoreConfigResources, controlPlaneNamespace, releaseImage, hcluster) if err != nil { SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{ @@ -642,7 +630,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho } // Signal ignition payload generation - targetPayloadConfigHash := supportutil.HashSimple(config + targetVersion + pullSecretName + globalConfig) + targetPayloadConfigHash := payloadConfigHash(config, targetVersion, pullSecretName, globalConfig) tokenSecret := TokenSecret(controlPlaneNamespace, nodePool.Name, targetPayloadConfigHash) condition, err := r.createValidGeneratedPayloadCondition(ctx, tokenSecret, nodePool.Generation) if err != nil { @@ -817,7 +805,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho userDataSecret := IgnitionUserDataSecret(controlPlaneNamespace, nodePool.GetName(), targetPayloadConfigHash) if result, err := r.CreateOrUpdate(ctx, r.Client, userDataSecret, func() error { - return reconcileUserDataSecret(userDataSecret, nodePool, caCertBytes, tokenBytes, ignEndpoint, targetPayloadConfigHash, proxy) + return reconcileUserDataSecret(userDataSecret, nodePool, caCertBytes, tokenBytes, ignEndpoint, targetPayloadConfigHash, globalconfig.ProxyConfig()) }); err != nil { return ctrl.Result{}, err } else { @@ -970,6 +958,15 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho return ctrl.Result{}, nil } +func expectedCoreConfigResourcesForHostedCluster(hcluster *hyperv1.HostedCluster) int { + expectedCoreConfigResources := 3 + if len(hcluster.Spec.ImageContentSources) > 0 { + // additional core config resource created when image content source specified. + expectedCoreConfigResources += 1 + } + return expectedCoreConfigResources +} + // setMachineAndNodeConditions sets the nodePool's AllMachinesReady and AllNodesHealthy conditions. func (r *NodePoolReconciler) setMachineAndNodeConditions(ctx context.Context, nodePool *hyperv1.NodePool, hc *hyperv1.HostedCluster) error { // Get all Machines for NodePool. @@ -2848,8 +2845,12 @@ func (r *NodePoolReconciler) getPullSecretBytes(ctx context.Context, hostedClust // getPullSecretName retrieves the name of the pull secret in the hosted cluster spec func (r *NodePoolReconciler) getPullSecretName(ctx context.Context, hostedCluster *hyperv1.HostedCluster) (string, error) { + return getPullSecretName(ctx, r.Client, hostedCluster) +} + +func getPullSecretName(ctx context.Context, crclient client.Client, hostedCluster *hyperv1.HostedCluster) (string, error) { pullSecret := &corev1.Secret{} - if err := r.Client.Get(ctx, client.ObjectKey{Namespace: hostedCluster.Namespace, Name: hostedCluster.Spec.PullSecret.Name}, pullSecret); err != nil { + if err := crclient.Get(ctx, client.ObjectKey{Namespace: hostedCluster.Namespace, Name: hostedCluster.Spec.PullSecret.Name}, pullSecret); err != nil { return "", fmt.Errorf("cannot get pull secret %s/%s: %w", hostedCluster.Namespace, hostedCluster.Spec.PullSecret.Name, err) } if _, hasKey := pullSecret.Data[corev1.DockerConfigJsonKey]; !hasKey { @@ -2917,3 +2918,153 @@ func aggregateMachineMessages(msgs []string) string { return builder.String() } + +// secretJanitor reconciles secrets and determines which secrets should remain in the cluster and which should be cleaned up. +// Any secret annotated with a nodePool name should only be on the cluster if the nodePool continues to exist +// and if our current calculation for the inputs to the name matches what the secret is named. +type secretJanitor struct { + *NodePoolReconciler +} + +func (r *secretJanitor) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + log := ctrl.LoggerFrom(ctx) + + secret := &corev1.Secret{} + if err := r.Client.Get(ctx, req.NamespacedName, secret); err != nil { + if apierrors.IsNotFound(err) { + log.Info("not found", "request", req.String()) + return ctrl.Result{}, nil + } + log.Error(err, "error getting secret") + return ctrl.Result{}, err + } + + // only handle secrets that are associated with a NodePool + nodePoolName, annotated := secret.Annotations[nodePoolAnnotation] + if !annotated { + return ctrl.Result{}, nil + } + log = log.WithValues("nodePool", nodePoolName) + + // only handle secret types that we know about explicitly + shouldHandle := false + for _, prefix := range []string{tokenSecretPrefix, ignitionUserDataPrefix} { + if strings.HasPrefix(secret.Name, prefix) { + shouldHandle = true + break + } + } + if !shouldHandle { + return ctrl.Result{}, nil + } + + nodePool := &hyperv1.NodePool{} + if err := r.Client.Get(ctx, supportutil.ParseNamespacedName(nodePoolName), nodePool); err != nil && !apierrors.IsNotFound(err) { + log.Error(err, "error getting nodepool") + return ctrl.Result{}, err + } else if apierrors.IsNotFound(err) { + log.Info("removing secret as nodePool is missing") + return ctrl.Result{}, r.Client.Delete(ctx, secret) + } + + hcluster, err := GetHostedClusterByName(ctx, r.Client, nodePool.GetNamespace(), nodePool.Spec.ClusterName) + if err != nil { + return ctrl.Result{}, err + } + + controlPlaneNamespace := manifests.HostedControlPlaneNamespace(hcluster.Namespace, hcluster.Name) + expectedCoreConfigResources := expectedCoreConfigResourcesForHostedCluster(hcluster) + releaseImage, err := r.getReleaseImage(ctx, hcluster, nodePool.Status.Version, nodePool.Spec.Release.Image) + if err != nil { + return ctrl.Result{}, err + } + + config, missingConfigs, err := r.getConfig(ctx, nodePool, expectedCoreConfigResources, controlPlaneNamespace, releaseImage, hcluster) + if err != nil { + return ctrl.Result{}, err + } + if missingConfigs { + return ctrl.Result{}, nil + } + targetVersion := releaseImage.Version() + + pullSecretName, err := r.getPullSecretName(ctx, hcluster) + if err != nil { + return ctrl.Result{}, err + } + + globalConfig, err := globalConfigString(hcluster) + if err != nil { + return ctrl.Result{}, err + } + + targetPayloadConfigHash := payloadConfigHash(config, targetVersion, pullSecretName, globalConfig) + + // synchronously deleting the ignition token is unsafe; we need to clean up tokens by annotating them to expire + synchronousCleanup := func(ctx context.Context, c client.Client, secret *corev1.Secret) error { + return c.Delete(ctx, secret) + } + type nodePoolSecret struct { + expectedName string + matchingPrefix string + cleanup func(context.Context, client.Client, *corev1.Secret) error + } + valid := false + options := []nodePoolSecret{ + { + expectedName: TokenSecret(controlPlaneNamespace, nodePool.Name, targetPayloadConfigHash).Name, + matchingPrefix: tokenSecretPrefix, + cleanup: setExpirationTimestampOnToken, + }, + { + expectedName: IgnitionUserDataSecret(controlPlaneNamespace, nodePool.GetName(), targetPayloadConfigHash).Name, + matchingPrefix: ignitionUserDataPrefix, + cleanup: synchronousCleanup, + }, + } + cleanup := synchronousCleanup + var names []string + for _, option := range options { + names = append(names, option.expectedName) + if secret.Name == option.expectedName { + valid = true + } + if strings.HasPrefix(secret.Name, option.matchingPrefix) { + cleanup = option.cleanup + } + } + + if valid { + return ctrl.Result{}, nil + } + + log.WithValues("options", names, "valid", valid).Info("removing secret as it does not match the expected set of names") + return ctrl.Result{}, cleanup(ctx, r.Client, secret) +} + +func globalConfigString(hcluster *hyperv1.HostedCluster) (string, error) { + // 1. - Reconcile conditions according to current state of the world. + proxy := globalconfig.ProxyConfig() + globalconfig.ReconcileProxyConfigWithStatusFromHostedCluster(proxy, hcluster) + + // NOTE: The image global config is not injected via userdata or NodePool ignition config. + // It is included directly by the ignition server. However, we need to detect the change + // here to trigger a nodepool update. + image := globalconfig.ImageConfig() + globalconfig.ReconcileImageConfigFromHostedCluster(image, hcluster) + + // Serialize proxy and image into a single string to use in the token secret hash. + globalConfigBytes := bytes.NewBuffer(nil) + enc := json.NewEncoder(globalConfigBytes) + if err := enc.Encode(proxy); err != nil { + return "", fmt.Errorf("failed to encode proxy global config: %w", err) + } + if err := enc.Encode(image); err != nil { + return "", fmt.Errorf("failed to encode image global config: %w", err) + } + return globalConfigBytes.String(), nil +} + +func payloadConfigHash(config, targetVersion, pullSecretName, globalConfig string) string { + return supportutil.HashSimple(config + targetVersion + pullSecretName + globalConfig) +}