From 00f655a4b7d551c7e5730afc65d7d07c26aeac64 Mon Sep 17 00:00:00 2001 From: lixiaoyuner <35456895+lixiaoyuner@users.noreply.github.com> Date: Sat, 15 Jul 2023 00:15:20 +0800 Subject: [PATCH] [ctgmgr]: do not remove label when do systemd service stop when service is in kube mode (#15642) Why I did it When sonic is managed by k8s, the sonic container is managed by k8s daemonset, daemonset identifies its members by labels. Currently when restarting a sonic service by systemctl, if the service's container is already managed by k8s, systemd script stops the container by removing the feature label to make it disjoin from k8s daemonset, and then starts it by adding the label to make it join k8s daemonset again. This behavior would cause problem during k8s container upgrade. Containers in daemonset are upgraded in a rolling fashion, that means the daemonset version is updated first, then rollout the new version to containers with precheck/postcheck one by one. However, if a sonic device joins a daemonset, k8s will directly deploy a pod with the current version of daemonset, it is expected when a device joins k8s cluster at first time. But for a device which has already joined k8s cluster, the re-joining daemonset will cause the container upgraded to new version without precheck, so if a systemd service is restarted during daemonset upgrade, the container may be upgraded without precheck and break rolling update policy. To fix it, we need to remove the logic about dropping k8s label in systemd service stop script for kube mode. Work item tracking Microsoft ADO (number only): 24304563 How I did it Don't drop label in systemd service stop script when feature's set_owner is kube. Only drop label when feature's set_owner is local. How to verify it The label feature_enabled should be always true if the feature's set owner is kube. --- src/sonic-ctrmgrd/ctrmgr/container | 2 +- src/sonic-ctrmgrd/tests/container_test.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/sonic-ctrmgrd/ctrmgr/container b/src/sonic-ctrmgrd/ctrmgr/container index 46af96ac3dd8..f3c8e171e288 100755 --- a/src/sonic-ctrmgrd/ctrmgr/container +++ b/src/sonic-ctrmgrd/ctrmgr/container @@ -288,7 +288,7 @@ def container_stop(feature, **kwargs): set_owner, _ , _ = read_config(feature) current_owner, remote_state, _ = read_state(feature) docker_id = container_id(feature) - remove_label = (remote_state != "pending") or (set_owner == "local") + remove_label = (set_owner == "local") debug_msg("{}: set_owner:{} current_owner:{} remote_state:{} docker_id:{}".format( feature, set_owner, current_owner, remote_state, docker_id)) diff --git a/src/sonic-ctrmgrd/tests/container_test.py b/src/sonic-ctrmgrd/tests/container_test.py index ad4a3a2f00b6..8866fdfa4a3d 100755 --- a/src/sonic-ctrmgrd/tests/container_test.py +++ b/src/sonic-ctrmgrd/tests/container_test.py @@ -244,11 +244,6 @@ "container_id": "", "container_version": "20201230.1.15" } - }, - common_test.KUBE_LABEL_TABLE: { - "SET": { - "snmp_enabled": "false" - } } } },