From ca1019e30c4e98730b041faff54c84a44824db42 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Fri, 21 Oct 2022 08:54:51 -0400 Subject: [PATCH] Retrieve CAPI/CAPA from release image Retrieves the CAPI and CAPA image components from the release image rather than registry.ci.openshift.org/hypershift/cluster-api:v1.0.0 and registry.ci.openshift.org/hypershift/cluster-api-aws-controller:v1.1.0 respectively --- .../hostedcluster/hostedcluster_controller.go | 45 ++-------- .../internal/platform/aws/aws.go | 12 +-- .../nodepool/nodepool_controller.go | 83 ++++++++++++++++--- support/util/imagemetadata.go | 42 ++++++++++ 4 files changed, 128 insertions(+), 54 deletions(-) diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index 75f690d95bd..face6a88ff3 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -108,10 +108,7 @@ const ( HostedClusterAnnotation = "hypershift.openshift.io/cluster" clusterDeletionRequeueDuration = 5 * time.Second - // Image built from https://github.com/openshift/cluster-api/tree/release-1.0 - // Upstream canonical image comes from https://console.cloud.google.com/gcr/images/k8s-staging-cluster-api/global/ - // us.gcr.io/k8s-artifacts-prod/cluster-api/cluster-api-controller:v1.0.0 - imageCAPI = "registry.ci.openshift.org/hypershift/cluster-api:v1.0.0" + ImageStreamCAPI = "cluster-capi-controllers" ImageStreamAutoscalerImage = "cluster-autoscaler" ImageStreamClusterMachineApproverImage = "cluster-machine-approver" @@ -1604,7 +1601,10 @@ func (r *HostedClusterReconciler) reconcileCAPIManager(ctx context.Context, crea } // Reconcile CAPI manager deployment - capiImage := imageCAPI + capiImage, err := hyperutil.GetPayloadImage(ctx, hcluster, ImageStreamCAPI) + if err != nil { + return fmt.Errorf("failed to retrieve capi image: %w", err) + } if envImage := os.Getenv(images.CAPIEnvVar); len(envImage) > 0 { capiImage = envImage } @@ -1849,8 +1849,8 @@ func servicePublishingStrategyByType(hcp *hyperv1.HostedCluster, svcType hyperv1 // reconcileAutoscaler orchestrates reconciliation of autoscaler components using // both the HostedCluster and the HostedControlPlane which the autoscaler takes // inputs from. -func (r *HostedClusterReconciler) reconcileAutoscaler(ctx context.Context, createOrUpdate upsert.CreateOrUpdateFN, hcluster *hyperv1.HostedCluster, hcp *hyperv1.HostedControlPlane, utilitiesImage string) error { - clusterAutoscalerImage, err := r.getPayloadImage(ctx, hcluster, ImageStreamAutoscalerImage) +func (r *HostedClusterReconciler) reconcileAutoscaler(ctx context.Context, createOrUpdate upsert.CreateOrUpdateFN, hcluster *hyperv1.HostedCluster, hcp *hyperv1.HostedControlPlane, utilitiesImage string) error { + clusterAutoscalerImage, err := hyperutil.GetPayloadImage(ctx, hcluster, ImageStreamAutoscalerImage) if err != nil { return fmt.Errorf("failed to get image for machine approver: %w", err) } @@ -2437,7 +2437,7 @@ func reconcileCAPIManagerDeployment(deployment *appsv1.Deployment, hc *hyperv1.H }, }, }, - Command: []string{"/manager"}, + Command: []string{"/bin/cluster-api-controller-manager"}, Args: []string{"--namespace", "$(MY_NAMESPACE)", "--alsologtostderr", "--v=4", @@ -3077,7 +3077,7 @@ func (r *HostedClusterReconciler) reconcileClusterPrometheusRBAC(ctx context.Con } func (r *HostedClusterReconciler) reconcileMachineApprover(ctx context.Context, createOrUpdate upsert.CreateOrUpdateFN, hcluster *hyperv1.HostedCluster, hcp *hyperv1.HostedControlPlane, utilitiesImage string) error { - machineApproverImage, err := r.getPayloadImage(ctx, hcluster, ImageStreamClusterMachineApproverImage) + machineApproverImage, err := hyperutil.GetPayloadImage(ctx, hcluster, ImageStreamClusterMachineApproverImage) if err != nil { return fmt.Errorf("failed to get image for machine approver: %w", err) } @@ -3916,33 +3916,6 @@ func validateClusterID(hc *hyperv1.HostedCluster) error { return nil } -// getReleaseImage get the releaseInfo releaseImage for a given HC release image reference. -func (r *HostedClusterReconciler) getReleaseImage(ctx context.Context, hc *hyperv1.HostedCluster) (*releaseinfo.ReleaseImage, error) { - var pullSecret corev1.Secret - if err := r.Client.Get(ctx, types.NamespacedName{Namespace: hc.Namespace, Name: hc.Spec.PullSecret.Name}, &pullSecret); err != nil { - return nil, fmt.Errorf("failed to get pull secret: %w", err) - } - pullSecretBytes, ok := pullSecret.Data[corev1.DockerConfigJsonKey] - if !ok { - return nil, fmt.Errorf("expected %s key in pull secret", corev1.DockerConfigJsonKey) - } - return r.ReleaseProvider.Lookup(ctx, hc.Spec.Release.Image, pullSecretBytes) -} - -// getPayloadImage get an image from the payload for a particular component. -func (r *HostedClusterReconciler) getPayloadImage(ctx context.Context, hc *hyperv1.HostedCluster, component string) (string, error) { - releaseImage, err := r.getReleaseImage(ctx, hc) - if err != nil { - return "", fmt.Errorf("failed to lookup release image: %w", err) - } - - image, exists := releaseImage.ComponentImages()[component] - if !exists { - return "", fmt.Errorf("image does not exist for release: %q", image) - } - return image, nil -} - func (r *HostedClusterReconciler) reconcileServiceAccountSigningKey(ctx context.Context, hc *hyperv1.HostedCluster, targetNamespace string, createOrUpdate upsert.CreateOrUpdateFN) error { privateBytes, publicBytes, err := r.serviceAccountSigningKeyBytes(ctx, hc) if err != nil { diff --git a/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws.go b/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws.go index 34cbd87253c..212e6615f6f 100644 --- a/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws.go +++ b/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws.go @@ -25,10 +25,7 @@ import ( ) const ( - // Image built from https://github.com/openshift/cluster-api-provider-aws/tree/release-1.1 - // Upstream canonical image comes from https://console.cloud.google.com/gcr/images/k8s-artifacts-prod - // us.gcr.io/k8s-artifacts-prod/cluster-api-aws/cluster-api-aws-controller:v1.1.0 - imageCAPA = "registry.ci.openshift.org/hypershift/cluster-api-aws-controller:v1.1.0" + ImageStreamCAPA = "aws-cluster-api-controllers" ) func New(utilitiesImage string) *AWS { @@ -63,7 +60,10 @@ func (p AWS) ReconcileCAPIInfraCR(ctx context.Context, c client.Client, createOr } func (p AWS) CAPIProviderDeploymentSpec(hcluster *hyperv1.HostedCluster, hcp *hyperv1.HostedControlPlane) (*appsv1.DeploymentSpec, error) { - providerImage := imageCAPA + providerImage, err := util.GetPayloadImage(context.TODO(), hcluster, ImageStreamCAPA) + if err != nil { + return nil, err + } if envImage := os.Getenv(images.AWSCAPIProviderEnvVar); len(envImage) > 0 { providerImage = envImage } @@ -161,7 +161,7 @@ func (p AWS) CAPIProviderDeploymentSpec(hcluster *hyperv1.HostedCluster, hcp *hy Value: "true", }, }, - Command: []string{"/manager"}, + Command: []string{"/bin/cluster-api-provider-aws-controller-manager"}, Args: []string{"--namespace", "$(MY_NAMESPACE)", "--alsologtostderr", "--v=4", diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go index 5e40eb0b37e..8d4a1acaebe 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go @@ -159,6 +159,24 @@ func (r *NodePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c return ctrl.Result{}, fmt.Errorf("failed to remove finalizer from nodepool: %w", err) } } + + //Ensure all machines are gone before deleting related machine secrets + machineSets, err := r.listMachineSets(nodePool) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to get list of machines from nodepool: %w", err) + } + for len(machineSets) > 0 { + machineSets, err = r.listMachineSets(nodePool) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to get list of machines from nodepool during machine set check: %w", err) + } + } + + // Delete all secrets related to the NodePool + if err := r.deleteNodePoolSecrets(ctx, nodePool); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to delete NodePool secrets: %w", err) + } + log.Info("Deleted nodepool", "name", req.NamespacedName) return ctrl.Result{}, nil } @@ -881,17 +899,6 @@ func (r *NodePoolReconciler) delete(ctx context.Context, nodePool *hyperv1.NodeP } } - // Delete any secret belonging to this NodePool i.e token Secret and userdata Secret. - secrets, err := r.listSecrets(ctx, nodePool) - if err != nil { - return fmt.Errorf("failed to list secrets: %w", err) - } - for k := range secrets { - if err := r.Delete(ctx, &secrets[k]); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("failed to delete secret: %w", err) - } - } - // Delete any ConfigMap belonging to this NodePool i.e. TunedConfig ConfigMaps. err = r.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace(controlPlaneNamespace), @@ -915,6 +922,20 @@ func (r *NodePoolReconciler) delete(ctx context.Context, nodePool *hyperv1.NodeP return nil } +func (r *NodePoolReconciler) deleteNodePoolSecrets(ctx context.Context, nodePool *hyperv1.NodePool) error { + // Delete any secret belonging to this NodePool i.e token Secret and userdata Secret. + secrets, err := r.listSecrets(ctx, nodePool) + if err != nil { + return fmt.Errorf("failed to list secrets: %w", err) + } + for k := range secrets { + if err := r.Delete(ctx, &secrets[k]); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to delete secret: %w", err) + } + } + return nil +} + func reconcileUserDataSecret(userDataSecret *corev1.Secret, nodePool *hyperv1.NodePool, CA, token []byte, ignEndpoint string, proxy *configv1.Proxy) error { // The token secret controller deletes expired token Secrets. // When that happens the NodePool controller reconciles and create a new one. @@ -1047,7 +1068,6 @@ func (r *NodePoolReconciler) reconcileMachineDeployment(log logr.Logger, nodePoolAnnotation: client.ObjectKeyFromObject(nodePool).String(), }, }, - Spec: capiv1.MachineSpec{ ClusterName: CAPIClusterName, Bootstrap: capiv1.Bootstrap{ @@ -1764,6 +1784,45 @@ func (r *NodePoolReconciler) listSecrets(ctx context.Context, nodePool *hyperv1. return filtered, nil } +func (r *NodePoolReconciler) listMachineSets(nodePool *hyperv1.NodePool) ([]client.Object, error) { + machineSetList := &unstructured.UnstructuredList{} + + var gvk schema.GroupVersionKind + var err error + + switch nodePool.Spec.Platform.Type { + // Define the desired template type and mutateTemplate function. + case hyperv1.AWSPlatform: + gvk, err = apiutil.GVKForObject(&capiaws.AWSMachineList{}, api.Scheme) + if err != nil { + return nil, err + } + default: + // need a default path that returns a value that does not cause the hypershift operator to crash + // if no explicit machineTemplate is defined safe to assume none exist + return nil, nil + } + machineSetList.SetGroupVersionKind(schema.GroupVersionKind{ + Group: gvk.Group, + Kind: gvk.Kind, + Version: gvk.Version, + }) + if err := r.List(context.Background(), machineSetList); err != nil { + return nil, fmt.Errorf("failed to list MachineSets: %w", err) + } + var filtered []client.Object + for i, machineTemplate := range machineSetList.Items { + if machineTemplate.GetAnnotations() != nil { + if annotation, ok := machineTemplate.GetAnnotations()[nodePoolAnnotation]; ok && + annotation == client.ObjectKeyFromObject(nodePool).String() { + filtered = append(filtered, &machineSetList.Items[i]) + } + } + } + + return filtered, nil +} + func (r *NodePoolReconciler) listMachineTemplates(nodePool *hyperv1.NodePool) ([]client.Object, error) { machineTemplateList := &unstructured.UnstructuredList{} diff --git a/support/util/imagemetadata.go b/support/util/imagemetadata.go index 23327256f97..d8c96967c3a 100644 --- a/support/util/imagemetadata.go +++ b/support/util/imagemetadata.go @@ -7,13 +7,18 @@ import ( "github.com/docker/distribution/registry/client/transport" "github.com/golang/groupcache/lru" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/rest" + hyperv1 "github.com/openshift/hypershift/api/v1alpha1" + cmdUtil "github.com/openshift/hypershift/cmd/util" + "github.com/openshift/hypershift/support/releaseinfo" "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/dockerv1client" "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/reference" "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/registryclient" "github.com/openshift/hypershift/support/thirdparty/oc/pkg/cli/image/manifest" "github.com/openshift/hypershift/support/thirdparty/oc/pkg/cli/image/manifest/dockercredentials" + corev1 "k8s.io/api/core/v1" ) var ( @@ -92,3 +97,40 @@ func ImageLabels(metadata *dockerv1client.DockerImageConfig) map[string]string { return metadata.ContainerConfig.Labels } } + +// getPullSecretFromHostedCluster gets the pull secret from the hosted cluster +func getPullSecretFromHostedCluster(ctx context.Context, hc *hyperv1.HostedCluster) ([]byte, error) { + var pullSecret corev1.Secret + c, err := cmdUtil.GetClient() + if err != nil { + return nil, fmt.Errorf("failed to setup client in CAPI: %w", err) + } + if err := c.Get(ctx, types.NamespacedName{Namespace: hc.Namespace, Name: hc.Spec.PullSecret.Name}, &pullSecret); err != nil { + return nil, fmt.Errorf("failed to get pull secret: %w", err) + } + pullSecretBytes, ok := pullSecret.Data[corev1.DockerConfigJsonKey] + if !ok { + return nil, fmt.Errorf("expected %s key in pull secret", corev1.DockerConfigJsonKey) + } + return pullSecretBytes, nil +} + +// GetPayloadImage get an image from the payload for a particular component +func GetPayloadImage(ctx context.Context, hc *hyperv1.HostedCluster, component string) (string, error) { + pullSecretBytes, err := getPullSecretFromHostedCluster(context.TODO(), hc) + if err != nil { + return "", fmt.Errorf("failed to get pull secret to retrieve cluster-autoscaler component image: %w", err) + } + + riprovider := &releaseinfo.RegistryClientProvider{} + releaseImage, err := releaseinfo.Provider.Lookup(riprovider, ctx, hc.Spec.Release.Image, pullSecretBytes) + if err != nil { + return "", fmt.Errorf("failed to lookup release image: %w", err) + } + + image, exists := releaseImage.ComponentImages()[component] + if !exists { + return "", fmt.Errorf("image does not exist for release: %q", image) + } + return image, nil +}