From 38e721bc2451738b3568c2fa1ca7a4d470e60ebc Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Wed, 19 Jul 2023 20:02:45 +0800 Subject: [PATCH] [ctrmgr]: Container image clean up bug fix (#15772) (#15870) --- src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py | 8 ++- src/sonic-ctrmgrd/ctrmgr/kube_commands.py | 49 +++++++++++-------- src/sonic-ctrmgrd/tests/kube_commands_test.py | 23 ++++++++- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py index 9bf12f4a8351..ea2db99f0258 100755 --- a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py +++ b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py @@ -581,7 +581,7 @@ def on_state_update(self, key, op, data): def do_tag_latest(self, feat, docker_id, image_ver): ret = kube_commands.tag_latest(feat, docker_id, image_ver) - if ret != 0: + if ret == 1: # Tag latest failed. Retry after an interval self.start_time = datetime.datetime.now() self.start_time += datetime.timedelta( @@ -590,7 +590,7 @@ def do_tag_latest(self, feat, docker_id, image_ver): log_debug("Tag latest as local failed retry after {} seconds @{}". format(remote_ctr_config[TAG_RETRY], self.start_time)) - else: + elif ret == 0: last_version = self.st_data[feat][ST_FEAT_CTR_STABLE_VER] if last_version == image_ver: last_version = self.st_data[feat][ST_FEAT_CTR_LAST_VER] @@ -600,6 +600,10 @@ def do_tag_latest(self, feat, docker_id, image_ver): self.st_data[ST_FEAT_CTR_LAST_VER] = last_version self.st_data[ST_FEAT_CTR_STABLE_VER] = image_ver self.do_clean_image(feat, image_ver, last_version) + elif ret == -1: + # This means the container we want to tag latest is not running + # so we don't need to do clean up + pass def do_clean_image(self, feat, current_version, last_version): ret = kube_commands.clean_image(feat, current_version, last_version) diff --git a/src/sonic-ctrmgrd/ctrmgr/kube_commands.py b/src/sonic-ctrmgrd/ctrmgr/kube_commands.py index 8a8aad41bc0a..fd63f2cb12bc 100755 --- a/src/sonic-ctrmgrd/ctrmgr/kube_commands.py +++ b/src/sonic-ctrmgrd/ctrmgr/kube_commands.py @@ -478,7 +478,7 @@ def tag_latest(feat, docker_id, image_ver): else: log_error(err) elif ret == -1: - ret = 0 + log_debug(out) else: log_error(err) return ret @@ -487,31 +487,38 @@ def _do_clean(feat, current_version, last_version): err = "" out = "" ret = 0 - DOCKER_ID = "docker_id" + IMAGE_ID = "image_id" REPO = "repo" _, image_info, err = _run_command("docker images |grep {} |grep -v latest |awk '{{print $1,$2,$3}}'".format(feat)) if image_info: - version_dict = {} - version_dict_default = {} + remote_image_version_dict = {} + local_image_version_dict = {} for info in image_info.split("\n"): - rep, version, docker_id = info.split() + rep, version, image_id = info.split() if len(rep.split("/")) == 1: - version_dict_default[version] = {DOCKER_ID: docker_id, REPO: rep} + local_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep} else: - version_dict[version] = {DOCKER_ID: docker_id, REPO: rep} + remote_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep} - if current_version in version_dict: - image_prefix = version_dict[current_version][REPO] - del version_dict[current_version] + if current_version in remote_image_version_dict: + image_prefix = remote_image_version_dict[current_version][REPO] + del remote_image_version_dict[current_version] else: out = "Current version {} doesn't exist.".format(current_version) ret = 0 return ret, out, err - # should be only one item in version_dict_default - for k, v in version_dict_default.items(): - local_version, local_repo, local_docker_id = k, v[REPO], v[DOCKER_ID] - tag_res, _, err = _run_command("docker tag {} {}:{} && docker rmi {}:{}".format( - local_docker_id, image_prefix, local_version, local_repo, local_version)) + # should be only one item in local_image_version_dict + for k, v in local_image_version_dict.items(): + local_version, local_repo, local_image_id = k, v[REPO], v[IMAGE_ID] + # if there is a kube image with same version, need to remove the kube version + # and tag the local version to kube version for fallback preparation + # and remove the local version + if local_version in remote_image_version_dict: + tag_res, _, err = _run_command("docker rmi {}:{} && docker tag {} {}:{} && docker rmi {}:{}".format( + image_prefix, local_version, local_image_id, image_prefix, local_version, local_repo, local_version)) + # if there is no kube image with same version, just remove the local version + else: + tag_res, _, err = _run_command("docker rmi {}:{}".format(local_repo, local_version)) if tag_res == 0: msg = "Tag {} local version images successfully".format(feat) log_debug(msg) @@ -520,12 +527,12 @@ def _do_clean(feat, current_version, last_version): err = "Failed to tag {} local version images. Err: {}".format(feat, err) return ret, out, err - if last_version in version_dict: - del version_dict[last_version] + if last_version in remote_image_version_dict: + del remote_image_version_dict[last_version] - versions = [item[DOCKER_ID] for item in version_dict.values()] - if versions: - clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(versions))) + image_id_remove_list = [item[IMAGE_ID] for item in remote_image_version_dict.values()] + if image_id_remove_list: + clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(image_id_remove_list))) else: clean_res = 0 if clean_res == 0: @@ -534,7 +541,7 @@ def _do_clean(feat, current_version, last_version): err = "Failed to clean {} old version images. Err: {}".format(feat, err) ret = 1 else: - err = "Failed to docker images |grep {} |awk '{{print $3}}'".format(feat) + err = "Failed to docker images |grep {} |awk '{{print $3}}'. Error: {}".format(feat, err) ret = 1 return ret, out, err diff --git a/src/sonic-ctrmgrd/tests/kube_commands_test.py b/src/sonic-ctrmgrd/tests/kube_commands_test.py index 62a0b5053b9e..4c0b3b24dc1a 100755 --- a/src/sonic-ctrmgrd/tests/kube_commands_test.py +++ b/src/sonic-ctrmgrd/tests/kube_commands_test.py @@ -266,7 +266,7 @@ }, 2: { common_test.DESCR: "Tag a unstable container", - common_test.RETVAL: 0, + common_test.RETVAL: -1, common_test.ARGS: ["snmp", "123456", "v1"], common_test.PROC_CMD: [ "docker ps |grep 123456" @@ -382,7 +382,7 @@ common_test.ARGS: ["snmp", "20201231.84", ""], common_test.PROC_CMD: [ "docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'", - "docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74" + "docker rmi docker-sonic-telemetry:20201231.74" ], common_test.PROC_OUT: [ "docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\ @@ -394,6 +394,25 @@ 0 ] }, + 5: { + common_test.DESCR: "Clean image successfuly(local to dry-kube to kube)", + common_test.RETVAL: 0, + common_test.ARGS: ["snmp", "20201231.84", "20201231.74"], + common_test.PROC_CMD: [ + "docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'", + "docker rmi sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74" + ], + common_test.PROC_OUT: [ + "docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\ + sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.74 507f8d28bf6f\n\ + sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.84 507f8d28bf6g", + "" + ], + common_test.PROC_CODE: [ + 0, + 0 + ] + }, } class TestKubeCommands(object):