Skip to content

Commit

Permalink
Make image archive/save idempotent, using image id as key
Browse files Browse the repository at this point in the history
  • Loading branch information
iamjpotts committed Nov 24, 2022
1 parent f17e6d5 commit 6ff9156
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,6 @@ dmypy.json

# Pyre type checker
.pyre/

# PyCharm
.idea
3 changes: 3 additions & 0 deletions changelogs/fragments/500-idempotent-image-archival.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
minor_changes:
- docker_image - archive_path - based on image id (hash), if the existing tar archive matches the source, do nothing.
Previously, each task execution re-created the archive.
95 changes: 95 additions & 0 deletions plugins/module_utils/image_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

import os

import json
import tarfile

from ansible.module_utils.common.text.converters import to_native


class ImageArchiveInvalidException(Exception):
def __init__(self, message, cause):
super(ImageArchiveInvalidException, self).__init__(message)

# Python 2 doesn't support causes
self.cause = cause


def archived_image_id(archive_path):
"""
Attempts to get Image.Id from meta data stored in the image archive tar file.
The tar should contain a file "manifest.json" with an array with a single entry,
and the entry should have a Config field with the image ID in its file name.
Raises:
ImageArchiveInvalidException if a file already exists at archive_path, but
we could not extract an image ID from it.
Returns:
Either None, if no file exists at archive_path, or the extracted image ID.
The extracted ID will not have a sha256: prefix.
:return str
"""

try:
# FileNotFoundError does not exist in Python 2
if not os.path.isfile(archive_path):
return None

tf = tarfile.open(archive_path, 'r')
try:
try:
ef = tf.extractfile('manifest.json')
try:
text = ef.read().decode('utf-8')
manifest = json.loads(text)
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to decode and deserialize manifest.json: %s" % to_native(exc),
exc
)
finally:
# In Python 2.6, this does not have __exit__
ef.close()

try:
config_file = manifest[0]['Config']
except KeyError as exc:
raise ImageArchiveInvalidException(
"Failed to get Config entry from manifest.json: %s" % to_native(exc),
exc
)

# Returns hash without 'sha256:' prefix
try:
# Strip off .json filename extension, leaving just the hash.
return os.path.splitext(config_file)[0]
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to extract image id from config file name %s: %s" % (config_file, to_native(exc)),
exc
)
except ImageArchiveInvalidException:
raise
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to extract manifest.json from tar file %s: %s" % (archive_path, to_native(exc)),
exc
)

finally:
# In Python 2.6, TarFile does not have __exit__
tf.close()

except ImageArchiveInvalidException:
raise
except Exception as exc:
raise ImageArchiveInvalidException("Failed to open tar file %s: %s" % (archive_path, to_native(exc)), exc)
32 changes: 28 additions & 4 deletions plugins/modules/docker_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@

import errno
import json
import logging
import os
import traceback

Expand All @@ -343,6 +344,10 @@
AnsibleDockerClient,
RequestException,
)
from ansible_collections.community.docker.plugins.module_utils.image_archive import (
archived_image_id,
ImageArchiveInvalidException
)
from ansible_collections.community.docker.plugins.module_utils.util import (
clean_dict_booleans_for_docker_api,
DockerBaseClass,
Expand Down Expand Up @@ -370,6 +375,9 @@
)


log = logging.getLogger(__name__)


class ImageManager(DockerBaseClass):

def __init__(self, client, results):
Expand Down Expand Up @@ -549,9 +557,26 @@ def archive_image(self, name, tag):
self.log("archive image: image %s not found" % image_name)
return

# Will have a 'sha256:' prefix
new_image_id = image['Id']

try:
archived_id = archived_image_id(self.archive_path)

if archived_id:
log.debug('Previous image ID was %s', archived_id)
else:
log.debug('No previous image')
except ImageArchiveInvalidException:
log.debug('Previous image ID not available', exc_info=True)
archived_id = None

changed = (new_image_id != ('sha256:%s' % archived_id))

self.results['actions'].append('Archived image %s to %s' % (image_name, self.archive_path))
self.results['changed'] = True
if not self.check_mode:
self.results['changed'] = changed

if (not self.check_mode) and changed:
self.log("Getting archive of image %s" % image_name)
try:
saved_image = self.client._stream_raw_result(
Expand All @@ -569,8 +594,7 @@ def archive_image(self, name, tag):
except Exception as exc:
self.fail("Error writing image archive %s - %s" % (self.archive_path, to_native(exc)))

if image:
self.results['image'] = image
self.results['image'] = image

def push_image(self, name, tag=None):
'''
Expand Down
15 changes: 15 additions & 0 deletions tests/integration/targets/docker_image/tasks/tests/options.yml
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,21 @@
source: pull
register: archive_image

- assert:
that:
- archive_image is changed

- name: Archive image again (idempotent)
docker_image:
name: "{{ docker_test_image_hello_world }}"
archive_path: "{{ remote_tmp_dir }}/image.tar"
source: pull
register: archive_image

- assert:
that:
- archive_image is not changed

- name: Archive image by ID
docker_image:
name: "{{ archive_image.image.Id }}"
Expand Down
150 changes: 150 additions & 0 deletions tests/unit/plugins/module_utils/test_image_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

import json
import os.path
import tarfile
import tempfile

from ansible_collections.community.docker.plugins.module_utils.image_archive import (
archived_image_id,
ImageArchiveInvalidException
)


def write_imitation_archive(file_name, image_id):
"""
Write a tar file meeting these requirements:
* Has a file manifest.json
* manifest.json contains a one-element array
* The element has a Config property with "[image_id].json" as the value name
Args:
file_name: real file to write
image_id: fake sha256 hash (without the sha256: prefix)
"""

manifest = [
{
'Config': '%s.json' % image_id
}
]

write_imitation_archive_with_manifest(file_name, manifest)


def write_imitation_archive_with_manifest(file_name, manifest):
manifest_file_name = tempfile.mkstemp(suffix='json')[1]

with open(manifest_file_name, 'w') as f:
f.write(json.dumps(manifest))

try:
tf = tarfile.open(file_name, 'w')
try:
ti = tarfile.TarInfo('manifest.json')
ti.size = os.path.getsize(manifest_file_name)

with open(manifest_file_name, 'rb') as f:
tf.addfile(ti, f)

finally:
# In Python 2.6, this does not have __exit__
tf.close()

finally:
os.remove(manifest_file_name)


def write_irrelevant_tar(file_name):
inner_file_name = tempfile.mkstemp(suffix='txt')[1]

with open(inner_file_name, 'w') as f:
f.write('Hello, world.')

tf = tarfile.open(file_name, 'w')
try:
ti = tarfile.TarInfo('hi.txt')
ti.size = os.path.getsize(inner_file_name)

with open(inner_file_name, 'rb') as f:
tf.addfile(ti, f)

finally:
tf.close()


def test_archived_image_id_extracts():
expected_id = "abcde12345"

file_name = tempfile.mkstemp(suffix='.tar')[1]

write_imitation_archive(file_name, expected_id)

actual_id = archived_image_id(file_name)

assert actual_id == expected_id

os.remove(file_name)


def test_archived_image_id_extracts_nothing_when_file_not_present():
full_name = os.path.join(os.path.split(__file__)[0], 'does-not-exist.tar')

image_id = archived_image_id(full_name)

assert image_id is None


def test_archived_image_id_raises_when_file_not_a_tar():
try:
archived_image_id(__file__)
raise AssertionError()
except ImageArchiveInvalidException as e:
assert isinstance(e.cause, tarfile.ReadError)
assert str(__file__) in str(e)


def test_archived_image_id_raises_when_tar_missing_manifest():
file_name = tempfile.mkstemp(suffix='tar')[1]

write_irrelevant_tar(file_name)

try:
archived_image_id(file_name)
raise AssertionError()
except ImageArchiveInvalidException as e:
print('Cause type is %s' % str(type(e.cause)))
print('Cause is %r', e.cause)
print('Cause message is %s', e.cause)
assert isinstance(e.cause, KeyError)
assert 'manifest.json' in str(e.cause)

os.remove(file_name)


def test_archived_image_id_raises_when_manifest_missing_id():
manifest = [
{
'foo': 'bar'
}
]

file_name = tempfile.mkstemp(suffix='.tar')[1]

write_imitation_archive_with_manifest(file_name, manifest)

try:
archived_image_id(file_name)
raise AssertionError()
except ImageArchiveInvalidException as e:
assert isinstance(e.cause, KeyError)
assert 'Config' in str(e.cause)

os.remove(file_name)

0 comments on commit 6ff9156

Please sign in to comment.