From baac25cbd8361ddbae0e7cbed674e2832372e5fc Mon Sep 17 00:00:00 2001 From: Brian Scholer <1260690+briantist@users.noreply.github.com> Date: Sun, 8 Oct 2023 16:52:05 -0400 Subject: [PATCH] add configurable support for both raw and base64 uploads (#109) * add configurable support for both raw and base64 uploads * add tests for IncomingCollectionStream * add changelog fragment * update readme --- README.md | 7 +++- changelogs/fragments/109-upload-format.yml | 6 +++ galactory/__init__.py | 2 + galactory/api/v2/collections.py | 5 ++- galactory/api/v3/collections.py | 5 ++- galactory/utilities.py | 22 ++++++++++ tests/unit/conftest.py | 7 ++-- .../test_incoming_collection_stream.py | 40 +++++++++++++++++++ 8 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 changelogs/fragments/109-upload-format.yml create mode 100644 tests/unit/utilities/test_incoming_collection_stream.py diff --git a/README.md b/README.md index f6d95aa..d31e685 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ usage: python -m galactory [-h] [-c CONFIG] [--listen-addr LISTEN_ADDR] [--cache-read CACHE_READ] [--cache-write CACHE_WRITE] [--use-property-fallback] [--health-check-custom-text HEALTH_CHECK_CUSTOM_TEXT] - [--api-version {v2,v3}] + [--api-version {v2,v3}] [--upload-format {base64,raw,auto}] galactory is a partial Ansible Galaxy proxy that uploads and downloads collections, using an Artifactory generic repository as its backend. @@ -138,6 +138,11 @@ optional arguments: The API versions to serve. Can be set to limit functionality to specific versions only. Defaults to all supported versions. [env var: GALACTORY_API_VERSION] + --upload-format {base64,raw,auto} + Galaxy accepts the uploaded collection tarball as either raw bytes or base64 encoded. + Ansible 2.9 uploads raw bytes, later versions upload base64. By default galactory will + try to auto-detect. Use this option to turn off auto-detection and force a specific format. + [env var: GALACTORY_UPLOAD_FORMAT] Args that start with '--' (eg. --listen-addr) can also be set in a config file (/etc/galactory.d/*.conf or ~/.galactory/*.conf or specified via -c). Config file syntax allows: diff --git a/changelogs/fragments/109-upload-format.yml b/changelogs/fragments/109-upload-format.yml new file mode 100644 index 0000000..e10d8f1 --- /dev/null +++ b/changelogs/fragments/109-upload-format.yml @@ -0,0 +1,6 @@ +--- +bugfixes: + - The collection publish endpoint required the file data to be base64 encoded. This worked for ``ansible-core>=2.10`` but did not work with Ansible 2.9 or other clients that were not aware of the need. Galactory can now detect and accept both raw bytes and base64 encoded content (https://github.com/briantist/galactory/issues/105). + +minor_changes: + - The option ``UPLOAD_FORMAT`` has been added to control the behavior of upload format detection. Auto-detection is attempted when the option is not set or set to ``auto``. Set it to ``base64`` to only accept base64-encoded content, or set it to ``raw`` to only accept unencoded content (https://github.com/briantist/galactory/pull/109). diff --git a/galactory/__init__.py b/galactory/__init__.py index 79b7167..d76a932 100644 --- a/galactory/__init__.py +++ b/galactory/__init__.py @@ -100,6 +100,7 @@ def create_configured_app(run=False, parse_known_only=True, parse_allow_abbrev=F parser.add_argument('--use-property-fallback', action='store_true', env_var='GALACTORY_USE_PROPERTY_FALLBACK', help='Set properties of an uploaded collection in a separate request after publshinng. Requires a Pro license of Artifactory. This feature is a workaround for an Artifactory proxy configuration error and may be removed in a future version.') parser.add_argument('--health-check-custom-text', type=str, default='', env_var='GALACTORY_HEALTH_CHECK_CUSTOM_TEXT', help='Sets custom_text field for health check endpoint responses.') parser.add_argument('--api-version', action='append', choices=['v2', 'v3'], env_var='GALACTORY_API_VERSION', help='The API versions to serve. Can be set to limit functionality to specific versions only. Defaults to all supported versions.') + parser.add_argument('--upload-format', type=str, env_var='GALACTORY_UPLOAD_FORMAT', choices=['base64', 'raw', 'auto'], default='auto', help='Galaxy accepts the uploaded collection tarball as either raw bytes or base64 encoded. Ansible 2.9 uploads raw bytes, later versions upload base64. By default galactory will try to auto-detect. Use this option to turn off auto-detection and force a specific format.') if parse_known_only: args, _ = parser.parse_known_args() @@ -177,6 +178,7 @@ def create_configured_app(run=False, parse_known_only=True, parse_allow_abbrev=F USE_PROPERTY_FALLBACK=args.use_property_fallback, HEALTH_CHECK_CUSTOM_TEXT=args.health_check_custom_text, API_VERSION=args.api_version, + UPLOAD_FORMAT=args.upload_format, ) if proxy_fix: diff --git a/galactory/api/v2/collections.py b/galactory/api/v2/collections.py index 3aeadfa..e6afe64 100644 --- a/galactory/api/v2/collections.py +++ b/galactory/api/v2/collections.py @@ -2,7 +2,6 @@ # (c) 2022 Brian Scholer (@briantist) from semver import VersionInfo -from base64io import Base64IO from flask import Response, jsonify, abort, url_for, request, current_app from . import bp as v2 @@ -12,6 +11,7 @@ authorize, _chunk_to_temp, upload_collection_from_hashed_tempfile, + IncomingCollectionStream, ) from ...upstream import ProxyUpstream from ...models import CollectionCollection @@ -287,11 +287,12 @@ def publish(): file = request.files['file'] skip_configured_auth = current_app.config['PUBLISH_SKIP_CONFIGURED_AUTH'] property_fallback = current_app.config.get('USE_PROPERTY_FALLBACK', False) + upload_format = current_app.config.get('UPLOAD_FORMAT') _scheme = current_app.config.get('PREFERRED_URL_SCHEME') target = authorize(request, current_app.config['ARTIFACTORY_PATH'] / file.filename, skip_configured_auth=skip_configured_auth) - with _chunk_to_temp(Base64IO(file)) as tmp: + with _chunk_to_temp(IncomingCollectionStream(file, format=upload_format)) as tmp: if tmp.sha256 != sha256: abort(Response(f"Hash mismatch: uploaded=='{sha256}', calculated=='{tmp.sha256}'", C.HTTP_INTERNAL_SERVER_ERROR)) diff --git a/galactory/api/v3/collections.py b/galactory/api/v3/collections.py index 4fbc5b7..524a717 100644 --- a/galactory/api/v3/collections.py +++ b/galactory/api/v3/collections.py @@ -2,7 +2,6 @@ # (c) 2023 Brian Scholer (@briantist) from semver import VersionInfo -from base64io import Base64IO from flask import Response, jsonify, abort, url_for, request, current_app from . import bp as v3 @@ -12,6 +11,7 @@ authorize, _chunk_to_temp, upload_collection_from_hashed_tempfile, + IncomingCollectionStream, ) from ...upstream import ProxyUpstream from ...models import CollectionCollection @@ -328,11 +328,12 @@ def publish(): file = request.files['file'] skip_configured_auth = current_app.config['PUBLISH_SKIP_CONFIGURED_AUTH'] property_fallback = current_app.config.get('USE_PROPERTY_FALLBACK', False) + upload_format = current_app.config.get('UPLOAD_FORMAT') _scheme = current_app.config.get('PREFERRED_URL_SCHEME') target = authorize(request, current_app.config['ARTIFACTORY_PATH'] / file.filename, skip_configured_auth=skip_configured_auth) - with _chunk_to_temp(Base64IO(file)) as tmp: + with _chunk_to_temp(IncomingCollectionStream(file, format=upload_format)) as tmp: if tmp.sha256 != sha256: abort(Response(f"Hash mismatch: uploaded=='{sha256}', calculated=='{tmp.sha256}'", C.HTTP_INTERNAL_SERVER_ERROR)) diff --git a/galactory/utilities.py b/galactory/utilities.py index 1e52a57..efb9d46 100644 --- a/galactory/utilities.py +++ b/galactory/utilities.py @@ -13,6 +13,7 @@ from urllib3 import Retry from requests.adapters import HTTPAdapter from requests import Session +from base64io import Base64IO from flask import current_app, abort, Response, Request from flask.json.provider import DefaultJSONProvider @@ -133,6 +134,27 @@ def lcm(a, b, *more): return abs(a * z) // math.gcd(a, z) +class IncomingCollectionStream: + def __new__(cls, stream: t.IO, *, format: str = None): + if format == 'raw': + return stream + if format == 'base64': + return Base64IO(stream) + return cls.detected_stream(stream) + + @staticmethod + def detected_stream(stream: t.IO): + with gzip.GzipFile(fileobj=stream, mode='rb') as gz: + try: + gz.read(1) + except gzip.BadGzipFile: + return Base64IO(stream) + else: + return stream + finally: + stream.seek(0) + + class HashedTempFile(): def __init__(self, handle, md5, sha1, sha256, close=True) -> None: self.handle = handle diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 0ba1086..352462f 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -5,6 +5,7 @@ import json import sys +from pathlib import Path from unittest import mock from shutil import copytree from artifactory import _ArtifactoryAccessor, _FakePathTemplate, ArtifactoryPath @@ -32,14 +33,14 @@ def client(app): @pytest.fixture -def virtual_fs_repo(fixture_finder, tmp_path): +def virtual_fs_repo(fixture_finder, tmp_path: Path): repo = tmp_path / 'repo' copytree(fixture_finder('artifactory', 'virtual'), repo) return repo @pytest.fixture -def mock_artifactory_accessor(fixture_loader, virtual_fs_repo): +def mock_artifactory_accessor(fixture_loader, virtual_fs_repo: Path): class MockArtifactoryAccessor(_ArtifactoryAccessor): def __init__(self) -> None: super().__init__() @@ -68,7 +69,7 @@ def get_stat_json(self, pathobj, key=None): @pytest.fixture -def mock_artifactory_path(mock_artifactory_accessor, virtual_fs_repo): +def mock_artifactory_path(mock_artifactory_accessor, virtual_fs_repo: Path): _artifactory_accessor = mock_artifactory_accessor() class MockArtifactoryPath(ArtifactoryPath): diff --git a/tests/unit/utilities/test_incoming_collection_stream.py b/tests/unit/utilities/test_incoming_collection_stream.py new file mode 100644 index 0000000..f6076f2 --- /dev/null +++ b/tests/unit/utilities/test_incoming_collection_stream.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# (c) 2023 Brian Scholer (@briantist) + +import pytest +import tarfile + +from pathlib import Path +from base64io import Base64IO +from galactory.utilities import IncomingCollectionStream + + +@pytest.fixture +def collection_tarball(virtual_fs_repo: Path, tmp_path: Path): + collection = next(virtual_fs_repo.glob("**/*.tar.gz")) + gz_path = tmp_path / collection.name + with tarfile.open(gz_path, mode='w:gz') as tar: + tar.add(collection) + return gz_path + + +@pytest.fixture +def base64_tarball(collection_tarball: Path, tmp_path: Path): + b64_path = tmp_path / f"{collection_tarball.name}.b64" + with open(collection_tarball, mode='rb') as raw, open(b64_path, mode='wb') as w, Base64IO(w) as f: + f.write(raw.read()) + return b64_path + + +class TestIncomingCollectionStream: + @pytest.mark.parametrize('format', [None, 'auto', 'undefined', 'raw']) + def test_raw(self, collection_tarball: Path, format: str): + with open(collection_tarball, mode='rb') as f: + assert IncomingCollectionStream.detected_stream(f) is f + assert IncomingCollectionStream(f, format=format) + + @pytest.mark.parametrize('format', [None, 'auto', 'undefined', 'base64']) + def test_base64(self, base64_tarball: Path, format: str): + with open(base64_tarball, mode='rb') as f: + assert isinstance(IncomingCollectionStream.detected_stream(f), Base64IO) + assert isinstance(IncomingCollectionStream(f, format=format), Base64IO)