From 015571a22d5ddf5da6b1a845839c477cec53f91c Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Sun, 30 Apr 2023 09:21:29 +0200 Subject: [PATCH 01/10] Normalize the collection cache filename so we don't depend on Galaxy servers always using the same method. --- changelogs/fragments/78-collection-cache.yml | 2 ++ src/antsibull_core/galaxy.py | 22 +++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) create mode 100644 changelogs/fragments/78-collection-cache.yml diff --git a/changelogs/fragments/78-collection-cache.yml b/changelogs/fragments/78-collection-cache.yml new file mode 100644 index 0000000..6d7d1c0 --- /dev/null +++ b/changelogs/fragments/78-collection-cache.yml @@ -0,0 +1,2 @@ +minor_changes: + - "Avoid using the collection artifact filename returned by the Galaxy server. Instead compose it in a uniform way (https://github.com/ansible-community/antsibull-core/pull/78)." diff --git a/src/antsibull_core/galaxy.py b/src/antsibull_core/galaxy.py index c750cfb..39676ed 100644 --- a/src/antsibull_core/galaxy.py +++ b/src/antsibull_core/galaxy.py @@ -419,22 +419,18 @@ async def download( :arg version: Version of the collection to download. :returns: The full path to the downloaded collection. """ - collection = collection.replace(".", "/") - release_info = await self.get_release_info(collection, version) + namespace, name = collection.split(".", 1) + filename = f"{namespace}-{name}-{version}.tar.gz" + + release_info = await self.get_release_info(f"{namespace}/{name}", version) release_url = release_info["download_url"] - download_filename = os.path.join( - self.download_dir, release_info["artifact"]["filename"] - ) + download_filename = os.path.join(self.download_dir, filename) sha256sum = release_info["artifact"]["sha256"] if self.collection_cache: - if release_info["artifact"]["filename"] in os.listdir( - self.collection_cache - ): - cached_copy = os.path.join( - self.collection_cache, release_info["artifact"]["filename"] - ) + if filename in os.listdir(self.collection_cache): + cached_copy = os.path.join(self.collection_cache, filename) if await verify_hash(cached_copy, sha256sum): shutil.copyfile(cached_copy, download_filename) return download_filename @@ -459,9 +455,7 @@ async def download( # Copy downloaded collection into cache if self.collection_cache: - cached_copy = os.path.join( - self.collection_cache, release_info["artifact"]["filename"] - ) + cached_copy = os.path.join(self.collection_cache, filename) shutil.copyfile(download_filename, cached_copy) return download_filename From e6cda289bab664848c3e9a3c4b68570b41798176 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Sun, 30 Apr 2023 09:33:08 +0200 Subject: [PATCH 02/10] Allow to trust collection cache. --- changelogs/fragments/78-collection-cache.yml | 1 + src/antsibull_core/galaxy.py | 16 ++++++++++++++++ src/antsibull_core/schemas/context.py | 11 ++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/changelogs/fragments/78-collection-cache.yml b/changelogs/fragments/78-collection-cache.yml index 6d7d1c0..3d789c5 100644 --- a/changelogs/fragments/78-collection-cache.yml +++ b/changelogs/fragments/78-collection-cache.yml @@ -1,2 +1,3 @@ minor_changes: - "Avoid using the collection artifact filename returned by the Galaxy server. Instead compose it in a uniform way (https://github.com/ansible-community/antsibull-core/pull/78)." + - "Allow the Galaxy downloader to trust its collection cache to avoid having to query the Galaxy server if an artifact exists in the cache. This can be set with the new configuration file option ``trust_collection_cache`` (https://github.com/ansible-community/antsibull-core/pull/78)." diff --git a/src/antsibull_core/galaxy.py b/src/antsibull_core/galaxy.py index 39676ed..28ce0aa 100644 --- a/src/antsibull_core/galaxy.py +++ b/src/antsibull_core/galaxy.py @@ -384,6 +384,7 @@ def __init__( galaxy_server: t.Optional[str] = None, collection_cache: str | None = None, context: t.Optional[GalaxyContext] = None, + trust_collection_cache: t.Optional[bool] = None, ) -> None: """ Create an object to download collections from galaxy. @@ -398,12 +399,19 @@ def __init__( These tarballs will be used instead of downloading new tarballs provided that the versions match the criteria (latest compatible version known to galaxy). Defaults to ``lib_ctx.get().collection_cache``. + :kwarg trust_collection_cache: If set to ``True``, will assume that if the collection + cache contains an artifact, it is the latest one available on the Galaxy server. + This avoids making a request to the Galaxy server to figure out the artifact's + checksum and comparting it before trusting the cached artifact. """ super().__init__(aio_session, galaxy_server=galaxy_server, context=context) self.download_dir = download_dir if collection_cache is None: collection_cache = app_context.lib_ctx.get().collection_cache self.collection_cache: t.Final[str | None] = collection_cache + if trust_collection_cache is None: + trust_collection_cache = app_context.lib_ctx.get().trust_collection_cache + self.trust_collection_cache = trust_collection_cache async def download( self, @@ -422,6 +430,14 @@ async def download( namespace, name = collection.split(".", 1) filename = f"{namespace}-{name}-{version}.tar.gz" + if self.collection_cache and self.trust_collection_cache: + filename = f"{namespace}-{name}-{version}.tar.gz" + if filename in os.listdir(self.collection_cache): + cached_copy = os.path.join(self.collection_cache, filename) + download_filename = os.path.join(self.download_dir, filename) + shutil.copyfile(cached_copy, download_filename) + return download_filename + release_info = await self.get_release_info(f"{namespace}/{name}", version) release_url = release_info["download_url"] diff --git a/src/antsibull_core/schemas/context.py b/src/antsibull_core/schemas/context.py index 6fbbecb..9e4e9c0 100644 --- a/src/antsibull_core/schemas/context.py +++ b/src/antsibull_core/schemas/context.py @@ -11,7 +11,7 @@ from ..utils.collections import ContextDict from .config import DEFAULT_LOGGING_CONFIG, LoggingModel -from .validators import convert_none, convert_path +from .validators import convert_bool, convert_none, convert_path class BaseModel(p.BaseModel): @@ -111,6 +111,10 @@ class LibContext(BaseModel): :ivar pypi_url: URL of the pypi server to query for information :ivar collection_cache: If set, must be a path pointing to a directory where collection tarballs are cached so they do not need to be downloaded from Galaxy twice. + :ivar trust_collection_cache: If set to ``True``, will assume that if the collection + cache contains an artifact, it is the latest one available on the Galaxy server. + This avoids making a request to the Galaxy server to figure out the artifact's + checksum and comparting it before trusting the cached artifact. """ chunksize: int = 4096 @@ -131,6 +135,7 @@ class LibContext(BaseModel): # pyre-ignore[8]: https://github.com/samuelcolvin/pydantic/issues/1684 pypi_url: p.HttpUrl = "https://pypi.org/" # type: ignore[assignment] collection_cache: t.Optional[str] = None + trust_collection_cache: bool = False # pylint: disable-next=unused-private-member __convert_nones = p.validator("process_max", pre=True, allow_reuse=True)( @@ -140,3 +145,7 @@ class LibContext(BaseModel): __convert_paths = p.validator("collection_cache", pre=True, allow_reuse=True)( convert_path ) + # pylint: disable-next=unused-private-member + __convert_bools = p.validator("trust_collection_cache", pre=True, allow_reuse=True)( + convert_bool + ) From 39bd6d6965dd8bc8cdb85105b62ed32689b5acd9 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Mon, 1 May 2023 08:25:27 +0200 Subject: [PATCH 03/10] Improve docs, add to sample config. --- antsibull.cfg | 6 ++++++ src/antsibull_core/galaxy.py | 2 +- src/antsibull_core/schemas/context.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/antsibull.cfg b/antsibull.cfg index 685a919..b7f1c19 100644 --- a/antsibull.cfg +++ b/antsibull.cfg @@ -23,6 +23,12 @@ file_check_content = 262144 # to cache downloaded collection artefacts from Ansible Galaxy in that directory # and speed up the docs build time: # collection_cache = ~/.cache/antsibull/collections +# Uncomment the following to trust the collection cache. This means that ansible-core +# will assume that if the collection cache contains an artifact, it is the current one +# available on the Galaxy server. This avoids making a request to the Galaxy server to +# figure out the artifact's checksum and comparting it before trusting the cached +# artifact. +# trust_collection_cache = true logging_cfg = { version = 1.0 outputs = { diff --git a/src/antsibull_core/galaxy.py b/src/antsibull_core/galaxy.py index 28ce0aa..baf9e8c 100644 --- a/src/antsibull_core/galaxy.py +++ b/src/antsibull_core/galaxy.py @@ -400,7 +400,7 @@ def __init__( versions match the criteria (latest compatible version known to galaxy). Defaults to ``lib_ctx.get().collection_cache``. :kwarg trust_collection_cache: If set to ``True``, will assume that if the collection - cache contains an artifact, it is the latest one available on the Galaxy server. + cache contains an artifact, it is the current one available on the Galaxy server. This avoids making a request to the Galaxy server to figure out the artifact's checksum and comparting it before trusting the cached artifact. """ diff --git a/src/antsibull_core/schemas/context.py b/src/antsibull_core/schemas/context.py index 9e4e9c0..2e3f05c 100644 --- a/src/antsibull_core/schemas/context.py +++ b/src/antsibull_core/schemas/context.py @@ -112,7 +112,7 @@ class LibContext(BaseModel): :ivar collection_cache: If set, must be a path pointing to a directory where collection tarballs are cached so they do not need to be downloaded from Galaxy twice. :ivar trust_collection_cache: If set to ``True``, will assume that if the collection - cache contains an artifact, it is the latest one available on the Galaxy server. + cache contains an artifact, it is the current one available on the Galaxy server. This avoids making a request to the Galaxy server to figure out the artifact's checksum and comparting it before trusting the cached artifact. """ From e6395f334c9976364b2d8fd7ef996749727a31f6 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Mon, 1 May 2023 08:25:44 +0200 Subject: [PATCH 04/10] Follow-up to previous PR. --- antsibull.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/antsibull.cfg b/antsibull.cfg index b7f1c19..4df5363 100644 --- a/antsibull.cfg +++ b/antsibull.cfg @@ -2,7 +2,7 @@ # GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) # SPDX-License-Identifier: GPL-3.0-or-later -ansible_base_url = https://github.com/ansible/ansible/ +ansible_core_repo_url = https://github.com/ansible/ansible/ # Number of bytes to read or write at one time for network or file I/O: chunksize = 4096 galaxy_url = https://galaxy.ansible.com/ From 5d32b815d6496d185417fd99b86d90024d0492d5 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Mon, 1 May 2023 09:21:31 +0200 Subject: [PATCH 05/10] Fix typo. --- antsibull.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/antsibull.cfg b/antsibull.cfg index 4df5363..4c5d8f9 100644 --- a/antsibull.cfg +++ b/antsibull.cfg @@ -23,7 +23,7 @@ file_check_content = 262144 # to cache downloaded collection artefacts from Ansible Galaxy in that directory # and speed up the docs build time: # collection_cache = ~/.cache/antsibull/collections -# Uncomment the following to trust the collection cache. This means that ansible-core +# Uncomment the following to trust the collection cache. This means that antsibull-core # will assume that if the collection cache contains an artifact, it is the current one # available on the Galaxy server. This avoids making a request to the Galaxy server to # figure out the artifact's checksum and comparting it before trusting the cached From 7be638b7557c8a264f76a648b010f57e10062c47 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Tue, 2 May 2023 09:25:33 +0200 Subject: [PATCH 06/10] Improvements from review. --- antsibull.cfg | 2 +- src/antsibull_core/galaxy.py | 19 +++++++++---------- src/antsibull_core/schemas/context.py | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/antsibull.cfg b/antsibull.cfg index 4c5d8f9..57f039f 100644 --- a/antsibull.cfg +++ b/antsibull.cfg @@ -26,7 +26,7 @@ file_check_content = 262144 # Uncomment the following to trust the collection cache. This means that antsibull-core # will assume that if the collection cache contains an artifact, it is the current one # available on the Galaxy server. This avoids making a request to the Galaxy server to -# figure out the artifact's checksum and comparting it before trusting the cached +# figure out the artifact's checksum and comparing it before trusting the cached # artifact. # trust_collection_cache = true logging_cfg = { diff --git a/src/antsibull_core/galaxy.py b/src/antsibull_core/galaxy.py index baf9e8c..15a5143 100644 --- a/src/antsibull_core/galaxy.py +++ b/src/antsibull_core/galaxy.py @@ -406,12 +406,13 @@ def __init__( """ super().__init__(aio_session, galaxy_server=galaxy_server, context=context) self.download_dir = download_dir + lib_ctx = app_context.lib_ctx.get() if collection_cache is None: - collection_cache = app_context.lib_ctx.get().collection_cache + collection_cache = lib_ctx.collection_cache self.collection_cache: t.Final[str | None] = collection_cache if trust_collection_cache is None: - trust_collection_cache = app_context.lib_ctx.get().trust_collection_cache - self.trust_collection_cache = trust_collection_cache + trust_collection_cache = lib_ctx.trust_collection_cache + self.trust_collection_cache: t.Final[bool] = trust_collection_cache async def download( self, @@ -429,24 +430,22 @@ async def download( """ namespace, name = collection.split(".", 1) filename = f"{namespace}-{name}-{version}.tar.gz" + download_filename = os.path.join(self.download_dir, filename) if self.collection_cache and self.trust_collection_cache: - filename = f"{namespace}-{name}-{version}.tar.gz" - if filename in os.listdir(self.collection_cache): - cached_copy = os.path.join(self.collection_cache, filename) - download_filename = os.path.join(self.download_dir, filename) + cached_copy = os.path.join(self.collection_cache, filename) + if os.path.isfile(cached_copy): shutil.copyfile(cached_copy, download_filename) return download_filename release_info = await self.get_release_info(f"{namespace}/{name}", version) release_url = release_info["download_url"] - download_filename = os.path.join(self.download_dir, filename) sha256sum = release_info["artifact"]["sha256"] if self.collection_cache: - if filename in os.listdir(self.collection_cache): - cached_copy = os.path.join(self.collection_cache, filename) + cached_copy = os.path.join(self.collection_cache, filename) + if os.path.isfile(cached_copy): if await verify_hash(cached_copy, sha256sum): shutil.copyfile(cached_copy, download_filename) return download_filename diff --git a/src/antsibull_core/schemas/context.py b/src/antsibull_core/schemas/context.py index 2e3f05c..acfb660 100644 --- a/src/antsibull_core/schemas/context.py +++ b/src/antsibull_core/schemas/context.py @@ -114,7 +114,7 @@ class LibContext(BaseModel): :ivar trust_collection_cache: If set to ``True``, will assume that if the collection cache contains an artifact, it is the current one available on the Galaxy server. This avoids making a request to the Galaxy server to figure out the artifact's - checksum and comparting it before trusting the cached artifact. + checksum and comparing it before trusting the cached artifact. """ chunksize: int = 4096 From 8f8ac0128035181aaa04d892530d85c31d4a24a0 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Tue, 2 May 2023 09:25:49 +0200 Subject: [PATCH 07/10] Use antsibull_core.utils.io.copy_file instead of shutil.copyfile. --- src/antsibull_core/galaxy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/antsibull_core/galaxy.py b/src/antsibull_core/galaxy.py index 15a5143..8bf7e1d 100644 --- a/src/antsibull_core/galaxy.py +++ b/src/antsibull_core/galaxy.py @@ -10,7 +10,6 @@ import asyncio import dataclasses import os.path -import shutil import typing as t from enum import Enum from urllib.parse import urljoin @@ -21,6 +20,7 @@ from . import app_context from .utils.hashing import verify_hash from .utils.http import retry_get +from .utils.io import copy_file # The type checker can handle finding aiohttp.client but flake8 cannot :-( if t.TYPE_CHECKING: @@ -435,7 +435,7 @@ async def download( if self.collection_cache and self.trust_collection_cache: cached_copy = os.path.join(self.collection_cache, filename) if os.path.isfile(cached_copy): - shutil.copyfile(cached_copy, download_filename) + await copy_file(cached_copy, download_filename) return download_filename release_info = await self.get_release_info(f"{namespace}/{name}", version) @@ -447,7 +447,7 @@ async def download( cached_copy = os.path.join(self.collection_cache, filename) if os.path.isfile(cached_copy): if await verify_hash(cached_copy, sha256sum): - shutil.copyfile(cached_copy, download_filename) + await copy_file(cached_copy, download_filename) return download_filename async with retry_get( @@ -471,7 +471,7 @@ async def download( # Copy downloaded collection into cache if self.collection_cache: cached_copy = os.path.join(self.collection_cache, filename) - shutil.copyfile(download_filename, cached_copy) + await copy_file(download_filename, cached_copy) return download_filename From 0dc2a04d2d6b5363e1c4078d015351dad2707092 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Tue, 2 May 2023 09:28:53 +0200 Subject: [PATCH 08/10] Skip check part of copying. --- src/antsibull_core/galaxy.py | 6 +++--- src/antsibull_core/utils/io.py | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/antsibull_core/galaxy.py b/src/antsibull_core/galaxy.py index 8bf7e1d..7fb852e 100644 --- a/src/antsibull_core/galaxy.py +++ b/src/antsibull_core/galaxy.py @@ -435,7 +435,7 @@ async def download( if self.collection_cache and self.trust_collection_cache: cached_copy = os.path.join(self.collection_cache, filename) if os.path.isfile(cached_copy): - await copy_file(cached_copy, download_filename) + await copy_file(cached_copy, download_filename, check_content=False) return download_filename release_info = await self.get_release_info(f"{namespace}/{name}", version) @@ -447,7 +447,7 @@ async def download( cached_copy = os.path.join(self.collection_cache, filename) if os.path.isfile(cached_copy): if await verify_hash(cached_copy, sha256sum): - await copy_file(cached_copy, download_filename) + await copy_file(cached_copy, download_filename, check_content=False) return download_filename async with retry_get( @@ -471,7 +471,7 @@ async def download( # Copy downloaded collection into cache if self.collection_cache: cached_copy = os.path.join(self.collection_cache, filename) - await copy_file(download_filename, cached_copy) + await copy_file(download_filename, cached_copy, check_content=False) return download_filename diff --git a/src/antsibull_core/utils/io.py b/src/antsibull_core/utils/io.py index 96f067a..277ddc2 100644 --- a/src/antsibull_core/utils/io.py +++ b/src/antsibull_core/utils/io.py @@ -22,18 +22,22 @@ mlog = log.fields(mod=__name__) -async def copy_file(source_path: StrOrBytesPath, dest_path: StrOrBytesPath) -> None: +async def copy_file(source_path: StrOrBytesPath, dest_path: StrOrBytesPath, + check_content: bool = True) -> None: """ Copy content from one file to another. :arg source_path: Source path. Must be a file. :arg dest_path: Destination path. + :kwarg check_content: If ``True`` (default) and ``lib_ctx.file_check_content > 0`` and the + destination file exists, first check whether source and destination are potentially equal + before actually copying, """ flog = mlog.fields(func="copy_file") flog.debug("Enter") lib_ctx = app_context.lib_ctx.get() - if lib_ctx.file_check_content > 0: + if check_content and lib_ctx.file_check_content > 0: # Check whether the destination file exists and has the same content as the source file, # in which case we won't overwrite the destination file try: From 83040d54f0706203b853d9b80b72a5ee6e18ef35 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Tue, 2 May 2023 09:29:17 +0200 Subject: [PATCH 09/10] Reformat. --- src/antsibull_core/utils/io.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/antsibull_core/utils/io.py b/src/antsibull_core/utils/io.py index 277ddc2..ac7cf48 100644 --- a/src/antsibull_core/utils/io.py +++ b/src/antsibull_core/utils/io.py @@ -22,8 +22,9 @@ mlog = log.fields(mod=__name__) -async def copy_file(source_path: StrOrBytesPath, dest_path: StrOrBytesPath, - check_content: bool = True) -> None: +async def copy_file( + source_path: StrOrBytesPath, dest_path: StrOrBytesPath, check_content: bool = True +) -> None: """ Copy content from one file to another. From 79bfee3a434d773c0a810d1d5fb1de0ce73daf45 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Tue, 2 May 2023 20:52:32 +0200 Subject: [PATCH 10/10] Update changelog. --- changelogs/fragments/78-collection-cache.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/changelogs/fragments/78-collection-cache.yml b/changelogs/fragments/78-collection-cache.yml index 3d789c5..4fdcce2 100644 --- a/changelogs/fragments/78-collection-cache.yml +++ b/changelogs/fragments/78-collection-cache.yml @@ -1,3 +1,4 @@ minor_changes: - "Avoid using the collection artifact filename returned by the Galaxy server. Instead compose it in a uniform way (https://github.com/ansible-community/antsibull-core/pull/78)." - "Allow the Galaxy downloader to trust its collection cache to avoid having to query the Galaxy server if an artifact exists in the cache. This can be set with the new configuration file option ``trust_collection_cache`` (https://github.com/ansible-community/antsibull-core/pull/78)." + - "Allow to skip content check when doing async file copying using ``antsibull_core.utils.io.copy_file()`` (https://github.com/ansible-community/antsibull-core/pull/78)."