From 9f2bc830597bb823c793961a028fcf59c7970c04 Mon Sep 17 00:00:00 2001 From: Gleb Aronsky Date: Mon, 11 Sep 2023 07:21:54 -0700 Subject: [PATCH] Gracefully stop the isolcpu and kubelet service This change ensures that the isolcpu_plugin service is stopped and masked prior to masking and stopping the kubelet service. Additionally, on startup, the kubelet service is unmasked and started prior to unmasking isolcpu_plugin. This change is intended to avoid any race conditions that can occur because of the dependency on kubelet by isolcpu_plugin, resulting in numerous restarts of both services and leading to failed node upgrades. Test Plan: - PASS: Upgrade kubelet AIO-SX - PASS: Upgrade kubelet on a STANDARD installation Closes-Bug: 2036985 Change-Id: Ifb2b512c3953d2a1f7efdba289a31d5a9315cae4 Signed-off-by: Gleb Aronsky --- .../modules/platform/manifests/kubernetes.pp | 54 +++++++++++++++---- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp index a0a0b812..f305675b 100644 --- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp +++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp @@ -864,15 +864,46 @@ } } +# Define for unmasking and starting a service +define platform::kubernetes::unmask_start_service($service_name, $onlyif = undef) { + # Unmask the service and start it now + exec { "unmask ${service_name}": + command => "/usr/bin/systemctl unmask --runtime ${service_name}", + onlyif => $onlyif, + } + # Tell pmon to start monitoring the service + -> exec { "start ${service_name} for upgrade": + command => "/usr/local/sbin/pmon-start ${service_name}", + onlyif => $onlyif, + } +} + +# Define for masking and stopping a service +define platform::kubernetes::mask_stop_service($service_name, $onlyif) { + # Mask the service and stop it now + exec { "mask ${service_name}": + command => "/usr/bin/systemctl mask --runtime --now ${service_name}", + onlyif => $onlyif, + } + # Tell pmon to stop the service so it doesn't try to restart it + -> exec { "stop ${service_name} for upgrade": + command => "/usr/local/sbin/pmon-stop ${service_name}", + onlyif => $onlyif, + } +} + class platform::kubernetes::mask_stop_kubelet { + # Mask and stop isolcpu_plugin service first if it is configured to run + # on this node + platform::kubernetes::mask_stop_service { 'isolcpu_plugin': + service_name => 'isolcpu_plugin', + onlyif => 'systemctl is-enabled isolcpu_plugin.service | grep -wq enabled', + } + # Mask restarting kubelet and stop it now so that we can unmount # and re-mount the bind mount. - exec { 'mask kubelet for master upgrade': - command => '/usr/bin/systemctl mask --runtime --now kubelet', - } - # Tell pmon to stop kubelet so it doesn't try to restart it - -> exec { 'stop kubelet for upgrade': - command => '/usr/local/sbin/pmon-stop kubelet', + -> platform::kubernetes::mask_stop_service { 'kubelet': + service_name => 'kubelet', } } @@ -911,11 +942,14 @@ timeout => 10, } # Unmask and restart kubelet after the bind mount is updated. - -> exec { 'unmask kubelet for upgrade': - command => '/usr/bin/systemctl unmask --runtime kubelet', + -> platform::kubernetes::unmask_start_service { 'kubelet': + service_name => 'kubelet', } - -> exec { 'start kubelet': - command => '/usr/local/sbin/pmon-start kubelet' + + # Unmask and start isolcpu_plugin service last + -> platform::kubernetes::unmask_start_service { 'isolcpu_plugin': + service_name => 'isolcpu_plugin', + onlyif => 'systemctl is-enabled isolcpu_plugin', } }