From fe3294ac21930ba85ade09498c92be00f3b5a985 Mon Sep 17 00:00:00 2001 From: David Date: Wed, 8 Nov 2023 13:45:03 -0500 Subject: [PATCH] controller: add ver check before boot image update --- pkg/controller/common/constants.go | 6 +++ pkg/operator/sync.go | 60 ++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/pkg/controller/common/constants.go b/pkg/controller/common/constants.go index cfce06e252..484543cca6 100644 --- a/pkg/controller/common/constants.go +++ b/pkg/controller/common/constants.go @@ -67,4 +67,10 @@ const ( // BootImagesConfigMapName is a Configmap of golden bootimages, updated by CVO on an upgrade BootImagesConfigMapName = "coreos-bootimages" + + // MCOVersionHashKey is the key for indexing the MCO git version hash stored in the bootimages configmap + MCOVersionHashKey = "MCOVersionHash" + + // MCOReleaseImageVersionKey is the key for indexing the MCO release version stored in the bootimages configmap + MCOReleaseImageVersionKey = "MCOReleaseImageVersion" ) diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 6895ebf78d..7b45bd1b58 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -1265,6 +1265,11 @@ func (optr *Operator) syncRequiredMachineConfigPools(_ *renderConfig) error { } releaseVersion, _ := optr.vStore.Get("operator") + // Calling this on a "required" pool for now + if err := optr.stampBootImagesCM(pool); err != nil { + klog.Errorf("Failed to stamp bootimages configmap: %w", err) + } + if err := isMachineConfigPoolConfigurationValid(pool, version.Hash, releaseVersion, opURL, optr.mcLister.Get); err != nil { lastErr = fmt.Errorf("MachineConfigPool %s has not progressed to latest configuration: %w, retrying", pool.Name, err) syncerr := optr.syncUpgradeableStatus() @@ -1465,6 +1470,61 @@ func (optr *Operator) getCAsFromConfigMap(namespace, name, key string) ([]byte, return getCAsFromConfigMap(cm, key) } +// This function stamps the current operator version and commit hash in the boot images configmap +// that lives in the MCO namespace. Before doing so, it ensures that the pool is targetting an MC +// that is generated by the current version of the MCO and that the pool atleast has 1 upgraded node(or +// has completed an upgrade). This "stamp" is used by the machine set controller as a safety before +// it updates boot images. + +func (optr *Operator) stampBootImagesCM(pool *mcfgv1.MachineConfigPool) error { + + // Ensure the targeted MC for this pool was generated by the current MCO + renderedMC, err := optr.mcLister.Get(pool.Spec.Configuration.Name) + if err != nil { + return fmt.Errorf("failed to grab rendered MC %s, error: %w", pool.Spec.Configuration.Name, err) + } + if renderedMC.Annotations[ctrlcommon.ReleaseImageVersionAnnotationKey] != version.ReleaseVersion { + klog.V(4).Infof("rendered MC release version %s mismatch with operator release version %s", renderedMC.Annotations[ctrlcommon.ReleaseImageVersionAnnotationKey], version.ReleaseVersion) + return nil + } + if renderedMC.Annotations[ctrlcommon.GeneratedByControllerVersionAnnotationKey] != version.Hash { + klog.V(4).Infof("rendered MC commit hash %s mismatch with operator release commit hash %s", renderedMC.Annotations[ctrlcommon.GeneratedByControllerVersionAnnotationKey], version.Hash) + return nil + } + + // Check if the pool has atleast one updated node(mid-upgrade), or if the pool has completed the upgrade to the new config(the additional check for spec==status here is + // to ensure we are not checking an older "Updated" condition and the MCP fields haven't caught up yet + if (apihelpers.IsMachineConfigPoolConditionTrue(pool.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) && pool.Status.UpdatedMachineCount > 0) || + (apihelpers.IsMachineConfigPoolConditionTrue(pool.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) && (pool.Spec.Configuration.Name == pool.Status.Configuration.Name)) { + cm, err := optr.clusterCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName) + if err != nil { + return fmt.Errorf("failed to grab boot images configmap: %w", err) + } + storedVersionHashFromCM, storedVersionHashFound := cm.Data[ctrlcommon.MCOVersionHashKey] + releaseVersionFromCM, releaseVersionFound := cm.Data[ctrlcommon.MCOReleaseImageVersionKey] + + if storedVersionHashFound && releaseVersionFound { + // No need to update if the existing versions are a match, exit + if storedVersionHashFromCM == version.Hash && releaseVersionFromCM == version.ReleaseVersion { + return nil + } + } + + // Stamp the configmap with newest commit hash and OCP release version + cm.Data[ctrlcommon.MCOVersionHashKey] = version.Hash + cm.Data[ctrlcommon.MCOReleaseImageVersionKey] = version.ReleaseVersion + + // Update the ConfigMap + _, err = optr.kubeClient.CoreV1().ConfigMaps(ctrlcommon.MCONamespace).Update(context.TODO(), cm, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update bootimages configmap %w", err) + } + klog.Infof("Stamped boot images configmap with %s and %s, machine pool updated count: %d", version.Hash, version.ReleaseVersion, pool.Status.UpdatedMachineCount) + + } + return nil +} + func getCAsFromConfigMap(cm *corev1.ConfigMap, key string) ([]byte, error) { if bd, bdok := cm.BinaryData[key]; bdok { return bd, nil