Skip to content

Commit

Permalink
Make image archive/save idempotent, using image id and repo tags as keys
Browse files Browse the repository at this point in the history
  • Loading branch information
iamjpotts committed Nov 29, 2022
1 parent 70d68dd commit b52cd49
Show file tree
Hide file tree
Showing 7 changed files with 578 additions and 10 deletions.
4 changes: 4 additions & 0 deletions changelogs/fragments/500-idempotent-image-archival.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
minor_changes:
- docker_image - when using ``archive_path``, detect whether changes are necessary based on the image ID (hash). If the existing tar archive matches the source, do nothing.
Previously, each task execution re-created the archive
(https://github.com/ansible-collections/community.docker/pull/500).
159 changes: 159 additions & 0 deletions plugins/module_utils/image_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

import os

import json
import tarfile

from ansible.module_utils.common.text.converters import to_native


class ImageArchiveManifestSummary(object):
"""
Represents data extracted from a manifest.json found in the tar archive output of the
"docker image save some:tag > some.tar" command.
"""

def __init__(self, image_id, repo_tags):
"""
:param image_id: File name portion of Config entry, e.g. abcde12345 from abcde12345.json
:type image_id: str
:param repo_tags Docker image names, e.g. ["hello-world:latest"]
:type repo_tags: list
"""

self.image_id = image_id
self.repo_tags = repo_tags


class ImageArchiveInvalidException(Exception):
def __init__(self, message, cause):
"""
:param message: Exception message
:type message: str
:param cause: Inner exception that this exception wraps
:type cause: Exception
"""

super(ImageArchiveInvalidException, self).__init__(message)

# Python 2 doesn't support causes
self.cause = cause


def api_image_id(archive_image_id):
"""
Accepts an image hash in the format stored in manifest.json, and returns an equivalent identifier
that represents the same image hash, but in the format presented by the Docker Engine API.
:param archive_image_id: plain image hash
:type archive_image_id: str
:returns: Prefixed hash used by REST api
:rtype: str
"""

return 'sha256:%s' % archive_image_id


def archived_image_manifest(archive_path):
"""
Attempts to get Image.Id and image name from metadata stored in the image
archive tar file.
The tar should contain a file "manifest.json" with an array with a single entry,
and the entry should have a Config field with the image ID in its file name, as
well as a RepoTags list, which typically has only one entry.
Raises:
ImageArchiveInvalidException if a file already exists at archive_path, but
we could not extract an image ID from it.
:param archive_path: Tar file to read
:type archive_path: str
:return: None, if no file at archive_path, or the extracted image ID, which will not have a sha256: prefix.
:rtype: ImageArchiveManifestSummary
"""

try:
# FileNotFoundError does not exist in Python 2
if not os.path.isfile(archive_path):
return None

tf = tarfile.open(archive_path, 'r')
try:
try:
ef = tf.extractfile('manifest.json')
try:
text = ef.read().decode('utf-8')
manifest = json.loads(text)
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to decode and deserialize manifest.json: %s" % to_native(exc),
exc
)
finally:
# In Python 2.6, this does not have __exit__
ef.close()

if len(manifest) != 1:
raise ImageArchiveInvalidException(
"Expected to have one entry in manifest.json but found %s" % len(manifest),
None
)

m0 = manifest[0]

try:
config_file = m0['Config']
except KeyError as exc:
raise ImageArchiveInvalidException(
"Failed to get Config entry from manifest.json: %s" % to_native(exc),
exc
)

# Extracts hash without 'sha256:' prefix
try:
# Strip off .json filename extension, leaving just the hash.
image_id = os.path.splitext(config_file)[0]
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to extract image id from config file name %s: %s" % (config_file, to_native(exc)),
exc
)

try:
repo_tags = m0['RepoTags']
except KeyError as exc:
raise ImageArchiveInvalidException(
"Failed to get RepoTags entry from manifest.json: %s" % to_native(exc),
exc
)

return ImageArchiveManifestSummary(
image_id=image_id,
repo_tags=repo_tags
)

except ImageArchiveInvalidException:
raise
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to extract manifest.json from tar file %s: %s" % (archive_path, to_native(exc)),
exc
)

finally:
# In Python 2.6, TarFile does not have __exit__
tf.close()

except ImageArchiveInvalidException:
raise
except Exception as exc:
raise ImageArchiveInvalidException("Failed to open tar file %s: %s" % (archive_path, to_native(exc)), exc)
78 changes: 69 additions & 9 deletions plugins/modules/docker_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,12 @@
AnsibleDockerClient,
RequestException,
)

from ansible_collections.community.docker.plugins.module_utils.image_archive import (
archived_image_manifest,
api_image_id,
ImageArchiveInvalidException,
)
from ansible_collections.community.docker.plugins.module_utils.util import (
clean_dict_booleans_for_docker_api,
DockerBaseClass,
Expand Down Expand Up @@ -373,6 +379,15 @@
class ImageManager(DockerBaseClass):

def __init__(self, client, results):
"""
Configure a docker_image task.
:param client: Ansible Docker Client wrapper over Docker client
:type client: AnsibleDockerClient
:param results: This task adds its output values to this dictionary
:type results: dict
"""

super(ImageManager, self).__init__()

Expand Down Expand Up @@ -527,13 +542,51 @@ def absent(self):
self.results['actions'].append("Removed image %s" % (name))
self.results['image']['state'] = 'Deleted'

@staticmethod
def archived_image_action(failure_logger, archive_path, current_image_name, current_image_id):
"""
If the archive is missing or requires replacement, return an action message.
:param failure_logger: a logging function that accepts one parameter of type str
:type failure_logger: Callable
:param archive_path: Filename to write archive to
:type archive_path: str
:param current_image_name: repo:tag
:type current_image_name: str
:param current_image_id: Hash, including hash type prefix such as "sha256:"
:type current_image_id: str
:returns: Either None, or an Ansible action message.
:rtype: str
"""

def build_msg(reason):
return 'Archived image %s to %s, %s' % (current_image_name, archive_path, reason)

try:
archived = archived_image_manifest(archive_path)
except ImageArchiveInvalidException as exc:
failure_logger('Unable to extract manifest summary from archive: %s' % to_native(exc))
return build_msg('overwriting an unreadable archive file')

if archived is None:
return build_msg('since none present')
elif current_image_id == api_image_id(archived.image_id) and [current_image_name] == archived.repo_tags:
return None
else:
name = ', '.join(archived.repo_tags)

return build_msg('overwriting archive with image %s named %s' % (archived.image_id, name))

def archive_image(self, name, tag):
'''
"""
Archive an image to a .tar file. Called when archive_path is passed.
:param name - name of the image. Type: str
:return None
'''
:param name: Name/repository of the image
:type name: str
:param tag: Optional image tag; assumed to be "latest" if None
:type tag: str | None
"""

if not tag:
tag = "latest"
Expand All @@ -549,9 +602,17 @@ def archive_image(self, name, tag):
self.log("archive image: image %s not found" % image_name)
return

self.results['actions'].append('Archived image %s to %s' % (image_name, self.archive_path))
self.results['changed'] = True
if not self.check_mode:
# Will have a 'sha256:' prefix
image_id = image['Id']

action = self.archived_image_action(self.client.module.debug, self.archive_path, image_name, image_id)

if action:
self.results['actions'].append(action)

self.results['changed'] = action is not None

if (not self.check_mode) and self.results['changed']:
self.log("Getting archive of image %s" % image_name)
try:
saved_image = self.client._stream_raw_result(
Expand All @@ -569,8 +630,7 @@ def archive_image(self, name, tag):
except Exception as exc:
self.fail("Error writing image archive %s - %s" % (self.archive_path, to_native(exc)))

if image:
self.results['image'] = image
self.results['image'] = image

def push_image(self, name, tag=None):
'''
Expand Down
60 changes: 59 additions & 1 deletion tests/integration/targets/docker_image/tasks/tests/options.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
set_fact:
iname: "{{ name_prefix ~ '-options' }}"
iname_1: "{{ name_prefix ~ '-options-1' }}"
hello_world_alt: "{{ name_prefix }}-hello-world-alt:v1.2.3-foo"

- name: Registering image name
set_fact:
inames: "{{ inames + [iname, iname_1] }}"
inames: "{{ inames + [iname, iname_1, hello_world_alt] }}"

####################################################################
## build.args ######################################################
Expand Down Expand Up @@ -235,6 +236,63 @@
source: pull
register: archive_image

- assert:
that:
- archive_image is changed

- name: Copy archive because we will mutate it but other tests need the original
copy:
remote_src: true
src: "{{ remote_tmp_dir }}/image.tar"
dest: "{{ remote_tmp_dir }}/image_mutated.tar"

- name: Archive image again (idempotent)
docker_image:
name: "{{ docker_test_image_hello_world }}"
archive_path: "{{ remote_tmp_dir }}/image_mutated.tar"
source: local
register: archive_image_2

- assert:
that:
- archive_image_2 is not changed

- name: Archive image 3rd time, should overwrite due to different id
docker_image:
name: "{{ docker_test_image_alpine_different }}"
archive_path: "{{ remote_tmp_dir }}/image_mutated.tar"
source: pull
register: archive_image_3

- assert:
that:
- archive_image_3 is changed

- name: Reset archive
copy:
remote_src: true
src: "{{ remote_tmp_dir }}/image.tar"
dest: "{{ remote_tmp_dir }}/image_mutated.tar"

- name: Tag image with different name
docker_image:
name: "{{ docker_test_image_hello_world }}"
repository: "{{ hello_world_alt }}"
source: local

- name: Archive image 4th time, should overwrite due to different name even when ID is same
docker_image:
name: "{{ hello_world_alt }}"
# Tagged as docker_test_image_hello_world but has same hash/id (before this task overwrites it)
archive_path: "{{ remote_tmp_dir }}/image_mutated.tar"
source: local
register: archive_image_4

- assert:
that:
- archive_image_4 is changed

# This is the test that needs the original, non-mutated archive
- name: Archive image by ID
docker_image:
name: "{{ archive_image.image.Id }}"
Expand Down
Loading

0 comments on commit b52cd49

Please sign in to comment.