diff --git a/changelogs/fragments/500-idempotent-image-archival.yaml b/changelogs/fragments/500-idempotent-image-archival.yaml new file mode 100644 index 000000000..1506f376d --- /dev/null +++ b/changelogs/fragments/500-idempotent-image-archival.yaml @@ -0,0 +1,4 @@ +minor_changes: + - docker_image - when using ``archive_path``, detect whether changes are necessary based on the image ID (hash). If the existing tar archive matches the source, do nothing. + Previously, each task execution re-created the archive + (https://github.com/ansible-collections/community.docker/pull/500). \ No newline at end of file diff --git a/plugins/module_utils/image_archive.py b/plugins/module_utils/image_archive.py new file mode 100644 index 000000000..227d9460a --- /dev/null +++ b/plugins/module_utils/image_archive.py @@ -0,0 +1,159 @@ +# Copyright 2022 Red Hat | Ansible +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import os + +import json +import tarfile + +from ansible.module_utils.common.text.converters import to_native + + +class ImageArchiveManifestSummary(object): + """ + Represents data extracted from a manifest.json found in the tar archive output of the + "docker image save some:tag > some.tar" command. + """ + + def __init__(self, image_id, repo_tags): + """ + :param image_id: File name portion of Config entry, e.g. abcde12345 from abcde12345.json + :type image_id: str + :param repo_tags Docker image names, e.g. ["hello-world:latest"] + :type repo_tags: list + """ + + self.image_id = image_id + self.repo_tags = repo_tags + + +class ImageArchiveInvalidException(Exception): + def __init__(self, message, cause): + """ + :param message: Exception message + :type message: str + :param cause: Inner exception that this exception wraps + :type cause: Exception + """ + + super(ImageArchiveInvalidException, self).__init__(message) + + # Python 2 doesn't support causes + self.cause = cause + + +def api_image_id(archive_image_id): + """ + Accepts an image hash in the format stored in manifest.json, and returns an equivalent identifier + that represents the same image hash, but in the format presented by the Docker Engine API. + + :param archive_image_id: plain image hash + :type archive_image_id: str + + :returns: Prefixed hash used by REST api + :rtype: str + """ + + return 'sha256:%s' % archive_image_id + + +def archived_image_manifest(archive_path): + """ + Attempts to get Image.Id and image name from metadata stored in the image + archive tar file. + + The tar should contain a file "manifest.json" with an array with a single entry, + and the entry should have a Config field with the image ID in its file name, as + well as a RepoTags list, which typically has only one entry. + + Raises: + ImageArchiveInvalidException if a file already exists at archive_path, but + we could not extract an image ID from it. + + :param archive_path: Tar file to read + :type archive_path: str + + :return: None, if no file at archive_path, or the extracted image ID, which will not have a sha256: prefix. + :rtype: ImageArchiveManifestSummary + """ + + try: + # FileNotFoundError does not exist in Python 2 + if not os.path.isfile(archive_path): + return None + + tf = tarfile.open(archive_path, 'r') + try: + try: + ef = tf.extractfile('manifest.json') + try: + text = ef.read().decode('utf-8') + manifest = json.loads(text) + except Exception as exc: + raise ImageArchiveInvalidException( + "Failed to decode and deserialize manifest.json: %s" % to_native(exc), + exc + ) + finally: + # In Python 2.6, this does not have __exit__ + ef.close() + + if len(manifest) != 1: + raise ImageArchiveInvalidException( + "Expected to have one entry in manifest.json but found %s" % len(manifest), + None + ) + + m0 = manifest[0] + + try: + config_file = m0['Config'] + except KeyError as exc: + raise ImageArchiveInvalidException( + "Failed to get Config entry from manifest.json: %s" % to_native(exc), + exc + ) + + # Extracts hash without 'sha256:' prefix + try: + # Strip off .json filename extension, leaving just the hash. + image_id = os.path.splitext(config_file)[0] + except Exception as exc: + raise ImageArchiveInvalidException( + "Failed to extract image id from config file name %s: %s" % (config_file, to_native(exc)), + exc + ) + + try: + repo_tags = m0['RepoTags'] + except KeyError as exc: + raise ImageArchiveInvalidException( + "Failed to get RepoTags entry from manifest.json: %s" % to_native(exc), + exc + ) + + return ImageArchiveManifestSummary( + image_id=image_id, + repo_tags=repo_tags + ) + + except ImageArchiveInvalidException: + raise + except Exception as exc: + raise ImageArchiveInvalidException( + "Failed to extract manifest.json from tar file %s: %s" % (archive_path, to_native(exc)), + exc + ) + + finally: + # In Python 2.6, TarFile does not have __exit__ + tf.close() + + except ImageArchiveInvalidException: + raise + except Exception as exc: + raise ImageArchiveInvalidException("Failed to open tar file %s: %s" % (archive_path, to_native(exc)), exc) diff --git a/plugins/modules/docker_image.py b/plugins/modules/docker_image.py index 24713c1d9..519c67e79 100644 --- a/plugins/modules/docker_image.py +++ b/plugins/modules/docker_image.py @@ -343,6 +343,12 @@ AnsibleDockerClient, RequestException, ) + +from ansible_collections.community.docker.plugins.module_utils.image_archive import ( + archived_image_manifest, + api_image_id, + ImageArchiveInvalidException, +) from ansible_collections.community.docker.plugins.module_utils.util import ( clean_dict_booleans_for_docker_api, DockerBaseClass, @@ -373,6 +379,15 @@ class ImageManager(DockerBaseClass): def __init__(self, client, results): + """ + Configure a docker_image task. + + :param client: Ansible Docker Client wrapper over Docker client + :type client: AnsibleDockerClient + + :param results: This task adds its output values to this dictionary + :type results: dict + """ super(ImageManager, self).__init__() @@ -527,13 +542,51 @@ def absent(self): self.results['actions'].append("Removed image %s" % (name)) self.results['image']['state'] = 'Deleted' + @staticmethod + def archived_image_action(failure_logger, archive_path, current_image_name, current_image_id): + """ + If the archive is missing or requires replacement, return an action message. + + :param failure_logger: a logging function that accepts one parameter of type str + :type failure_logger: Callable + :param archive_path: Filename to write archive to + :type archive_path: str + :param current_image_name: repo:tag + :type current_image_name: str + :param current_image_id: Hash, including hash type prefix such as "sha256:" + :type current_image_id: str + + :returns: Either None, or an Ansible action message. + :rtype: str + """ + + def build_msg(reason): + return 'Archived image %s to %s, %s' % (current_image_name, archive_path, reason) + + try: + archived = archived_image_manifest(archive_path) + except ImageArchiveInvalidException as exc: + failure_logger('Unable to extract manifest summary from archive: %s' % to_native(exc)) + return build_msg('overwriting an unreadable archive file') + + if archived is None: + return build_msg('since none present') + elif current_image_id == api_image_id(archived.image_id) and [current_image_name] == archived.repo_tags: + return None + else: + name = ', '.join(archived.repo_tags) + + return build_msg('overwriting archive with image %s named %s' % (archived.image_id, name)) + def archive_image(self, name, tag): - ''' + """ Archive an image to a .tar file. Called when archive_path is passed. - :param name - name of the image. Type: str - :return None - ''' + :param name: Name/repository of the image + :type name: str + :param tag: Optional image tag; assumed to be "latest" if None + :type tag: str | None + """ if not tag: tag = "latest" @@ -549,9 +602,17 @@ def archive_image(self, name, tag): self.log("archive image: image %s not found" % image_name) return - self.results['actions'].append('Archived image %s to %s' % (image_name, self.archive_path)) - self.results['changed'] = True - if not self.check_mode: + # Will have a 'sha256:' prefix + image_id = image['Id'] + + action = self.archived_image_action(self.client.module.debug, self.archive_path, image_name, image_id) + + if action: + self.results['actions'].append(action) + + self.results['changed'] = action is not None + + if (not self.check_mode) and self.results['changed']: self.log("Getting archive of image %s" % image_name) try: saved_image = self.client._stream_raw_result( @@ -569,8 +630,7 @@ def archive_image(self, name, tag): except Exception as exc: self.fail("Error writing image archive %s - %s" % (self.archive_path, to_native(exc))) - if image: - self.results['image'] = image + self.results['image'] = image def push_image(self, name, tag=None): ''' diff --git a/tests/integration/targets/docker_image/tasks/tests/options.yml b/tests/integration/targets/docker_image/tasks/tests/options.yml index cf89fb270..401ee7861 100644 --- a/tests/integration/targets/docker_image/tasks/tests/options.yml +++ b/tests/integration/targets/docker_image/tasks/tests/options.yml @@ -7,10 +7,11 @@ set_fact: iname: "{{ name_prefix ~ '-options' }}" iname_1: "{{ name_prefix ~ '-options-1' }}" + hello_world_alt: "{{ name_prefix }}-hello-world-alt:v1.2.3-foo" - name: Registering image name set_fact: - inames: "{{ inames + [iname, iname_1] }}" + inames: "{{ inames + [iname, iname_1, hello_world_alt] }}" #################################################################### ## build.args ###################################################### @@ -235,6 +236,63 @@ source: pull register: archive_image +- assert: + that: + - archive_image is changed + +- name: Copy archive because we will mutate it but other tests need the original + copy: + remote_src: true + src: "{{ remote_tmp_dir }}/image.tar" + dest: "{{ remote_tmp_dir }}/image_mutated.tar" + +- name: Archive image again (idempotent) + docker_image: + name: "{{ docker_test_image_hello_world }}" + archive_path: "{{ remote_tmp_dir }}/image_mutated.tar" + source: local + register: archive_image_2 + +- assert: + that: + - archive_image_2 is not changed + +- name: Archive image 3rd time, should overwrite due to different id + docker_image: + name: "{{ docker_test_image_alpine_different }}" + archive_path: "{{ remote_tmp_dir }}/image_mutated.tar" + source: pull + register: archive_image_3 + +- assert: + that: + - archive_image_3 is changed + +- name: Reset archive + copy: + remote_src: true + src: "{{ remote_tmp_dir }}/image.tar" + dest: "{{ remote_tmp_dir }}/image_mutated.tar" + +- name: Tag image with different name + docker_image: + name: "{{ docker_test_image_hello_world }}" + repository: "{{ hello_world_alt }}" + source: local + +- name: Archive image 4th time, should overwrite due to different name even when ID is same + docker_image: + name: "{{ hello_world_alt }}" + # Tagged as docker_test_image_hello_world but has same hash/id (before this task overwrites it) + archive_path: "{{ remote_tmp_dir }}/image_mutated.tar" + source: local + register: archive_image_4 + +- assert: + that: + - archive_image_4 is changed + +# This is the test that needs the original, non-mutated archive - name: Archive image by ID docker_image: name: "{{ archive_image.image.Id }}" diff --git a/tests/unit/plugins/module_utils/test_image_archive.py b/tests/unit/plugins/module_utils/test_image_archive.py new file mode 100644 index 000000000..558be4f35 --- /dev/null +++ b/tests/unit/plugins/module_utils/test_image_archive.py @@ -0,0 +1,94 @@ +# Copyright 2022 Red Hat | Ansible +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import pytest +import tarfile + +from ansible_collections.community.docker.plugins.module_utils.image_archive import ( + api_image_id, + archived_image_manifest, + ImageArchiveInvalidException +) + +from ..test_support.docker_image_archive_stubbing import ( + write_imitation_archive, + write_imitation_archive_with_manifest, + write_irrelevant_tar, +) + + +@pytest.fixture +def tar_file_name(tmpdir): + """ + Return the name of a non-existing tar file in an existing temporary directory. + """ + + # Cast to str required by Python 2.x + return str(tmpdir.join('foo.tar')) + + +@pytest.mark.parametrize('expected, value', [ + ('sha256:foo', 'foo'), + ('sha256:bar', 'bar') +]) +def test_api_image_id_from_archive_id(expected, value): + assert api_image_id(value) == expected + + +def test_archived_image_manifest_extracts(tar_file_name): + expected_id = "abcde12345" + expected_tags = ["foo:latest", "bar:v1"] + + write_imitation_archive(tar_file_name, expected_id, expected_tags) + + actual = archived_image_manifest(tar_file_name) + + assert actual.image_id == expected_id + assert actual.repo_tags == expected_tags + + +def test_archived_image_manifest_extracts_nothing_when_file_not_present(tar_file_name): + image_id = archived_image_manifest(tar_file_name) + + assert image_id is None + + +def test_archived_image_manifest_raises_when_file_not_a_tar(): + try: + archived_image_manifest(__file__) + raise AssertionError() + except ImageArchiveInvalidException as e: + assert isinstance(e.cause, tarfile.ReadError) + assert str(__file__) in str(e) + + +def test_archived_image_manifest_raises_when_tar_missing_manifest(tar_file_name): + write_irrelevant_tar(tar_file_name) + + try: + archived_image_manifest(tar_file_name) + raise AssertionError() + except ImageArchiveInvalidException as e: + assert isinstance(e.cause, KeyError) + assert 'manifest.json' in str(e.cause) + + +def test_archived_image_manifest_raises_when_manifest_missing_id(tar_file_name): + manifest = [ + { + 'foo': 'bar' + } + ] + + write_imitation_archive_with_manifest(tar_file_name, manifest) + + try: + archived_image_manifest(tar_file_name) + raise AssertionError() + except ImageArchiveInvalidException as e: + assert isinstance(e.cause, KeyError) + assert 'Config' in str(e.cause) diff --git a/tests/unit/plugins/modules/test_docker_image.py b/tests/unit/plugins/modules/test_docker_image.py new file mode 100644 index 000000000..7755c7cc7 --- /dev/null +++ b/tests/unit/plugins/modules/test_docker_image.py @@ -0,0 +1,117 @@ +# Copyright 2022 Red Hat | Ansible +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import pytest + +from ansible_collections.community.docker.plugins.modules.docker_image import ImageManager + +from ansible_collections.community.docker.plugins.module_utils.image_archive import ( + api_image_id, + ImageArchiveInvalidException +) + +from ..test_support.docker_image_archive_stubbing import ( + write_imitation_archive, + write_irrelevant_tar, +) + + +def assert_no_logging(msg): + raise AssertionError('Should not have logged anything but logged %s' % msg) + + +def capture_logging(messages): + def capture(msg): + messages.append(msg) + + return capture + + +@pytest.fixture +def tar_file_name(tmpdir): + """ + Return the name of a non-existing tar file in an existing temporary directory. + """ + + # Cast to str required by Python 2.x + return str(tmpdir.join('foo.tar')) + + +def test_archived_image_action_when_missing(tar_file_name): + fake_name = 'a:latest' + fake_id = 'a1' + + expected = 'Archived image %s to %s, since none present' % (fake_name, tar_file_name) + + actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, fake_name, api_image_id(fake_id)) + + assert actual == expected + + +def test_archived_image_action_when_current(tar_file_name): + fake_name = 'b:latest' + fake_id = 'b2' + + write_imitation_archive(tar_file_name, fake_id, [fake_name]) + + actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, fake_name, api_image_id(fake_id)) + + assert actual is None + + +def test_archived_image_action_when_invalid(tar_file_name): + fake_name = 'c:1.2.3' + fake_id = 'c3' + + write_irrelevant_tar(tar_file_name) + + expected = 'Archived image %s to %s, overwriting an unreadable archive file' % (fake_name, tar_file_name) + + actual_log = [] + actual = ImageManager.archived_image_action( + capture_logging(actual_log), + tar_file_name, + fake_name, + api_image_id(fake_id) + ) + + assert actual == expected + + assert len(actual_log) == 1 + assert actual_log[0].startswith('Unable to extract manifest summary from archive') + + +def test_archived_image_action_when_obsolete_by_id(tar_file_name): + fake_name = 'd:0.0.1' + old_id = 'e5' + new_id = 'd4' + + write_imitation_archive(tar_file_name, old_id, [fake_name]) + + expected = 'Archived image %s to %s, overwriting archive with image %s named %s' % ( + fake_name, tar_file_name, old_id, fake_name + ) + actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, fake_name, api_image_id(new_id)) + + assert actual == expected + + +def test_archived_image_action_when_obsolete_by_name(tar_file_name): + old_name = 'hi' + new_name = 'd:0.0.1' + fake_id = 'd4' + + write_imitation_archive(tar_file_name, fake_id, [old_name]) + + expected = 'Archived image %s to %s, overwriting archive with image %s named %s' % ( + new_name, tar_file_name, fake_id, old_name + ) + actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, new_name, api_image_id(fake_id)) + + print('actual : %s', actual) + print('expected : %s', expected) + assert actual == expected diff --git a/tests/unit/plugins/test_support/docker_image_archive_stubbing.py b/tests/unit/plugins/test_support/docker_image_archive_stubbing.py new file mode 100644 index 000000000..3a5eb8631 --- /dev/null +++ b/tests/unit/plugins/test_support/docker_image_archive_stubbing.py @@ -0,0 +1,76 @@ +# Copyright 2022 Red Hat | Ansible +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import json +import tarfile +from tempfile import TemporaryFile + + +def write_imitation_archive(file_name, image_id, repo_tags): + """ + Write a tar file meeting these requirements: + + * Has a file manifest.json + * manifest.json contains a one-element array + * The element has a Config property with "[image_id].json" as the value name + + :param file_name: Name of file to create + :type file_name: str + :param image_id: Fake sha256 hash (without the sha256: prefix) + :type image_id: str + :param repo_tags: list of fake image:tag's + :type repo_tags: list + """ + + manifest = [ + { + 'Config': '%s.json' % image_id, + 'RepoTags': repo_tags + } + ] + + write_imitation_archive_with_manifest(file_name, manifest) + + +def write_imitation_archive_with_manifest(file_name, manifest): + tf = tarfile.open(file_name, 'w') + try: + with TemporaryFile() as f: + f.write(json.dumps(manifest).encode('utf-8')) + + ti = tarfile.TarInfo('manifest.json') + ti.size = f.tell() + + f.seek(0) + tf.addfile(ti, f) + + finally: + # In Python 2.6, this does not have __exit__ + tf.close() + + +def write_irrelevant_tar(file_name): + """ + Create a tar file that does not match the spec for "docker image save" / "docker image load" commands. + + :param file_name: Name of tar file to create + :type file_name: str + """ + + tf = tarfile.open(file_name, 'w') + try: + with TemporaryFile() as f: + f.write('Hello, world.'.encode('utf-8')) + + ti = tarfile.TarInfo('hi.txt') + ti.size = f.tell() + + f.seek(0) + tf.addfile(ti, f) + + finally: + tf.close()