Skip to content

Commit

Permalink
Allow Galaxy downloader to trust collection cache (#78)
Browse files Browse the repository at this point in the history
* Normalize the collection cache filename so we don't depend on Galaxy servers always using the same method.

* Allow to trust collection cache.

* Improve docs, add to sample config.

* Follow-up to previous PR.

* Fix typo.

* Improvements from review.

* Use antsibull_core.utils.io.copy_file instead of shutil.copyfile.

* Skip check part of copying.

* Reformat.

* Update changelog.
  • Loading branch information
felixfontein authored May 2, 2023
1 parent 21f0da7 commit 59f1d05
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 22 deletions.
8 changes: 7 additions & 1 deletion antsibull.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later

ansible_base_url = https://github.com/ansible/ansible/
ansible_core_repo_url = https://github.com/ansible/ansible/
# Number of bytes to read or write at one time for network or file I/O:
chunksize = 4096
galaxy_url = https://galaxy.ansible.com/
Expand All @@ -23,6 +23,12 @@ file_check_content = 262144
# to cache downloaded collection artefacts from Ansible Galaxy in that directory
# and speed up the docs build time:
# collection_cache = ~/.cache/antsibull/collections
# Uncomment the following to trust the collection cache. This means that antsibull-core
# will assume that if the collection cache contains an artifact, it is the current one
# available on the Galaxy server. This avoids making a request to the Galaxy server to
# figure out the artifact's checksum and comparing it before trusting the cached
# artifact.
# trust_collection_cache = true
logging_cfg = {
version = 1.0
outputs = {
Expand Down
4 changes: 4 additions & 0 deletions changelogs/fragments/78-collection-cache.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
minor_changes:
- "Avoid using the collection artifact filename returned by the Galaxy server. Instead compose it in a uniform way (https://github.com/ansible-community/antsibull-core/pull/78)."
- "Allow the Galaxy downloader to trust its collection cache to avoid having to query the Galaxy server if an artifact exists in the cache. This can be set with the new configuration file option ``trust_collection_cache`` (https://github.com/ansible-community/antsibull-core/pull/78)."
- "Allow to skip content check when doing async file copying using ``antsibull_core.utils.io.copy_file()`` (https://github.com/ansible-community/antsibull-core/pull/78)."
45 changes: 27 additions & 18 deletions src/antsibull_core/galaxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import asyncio
import dataclasses
import os.path
import shutil
import typing as t
from enum import Enum
from urllib.parse import urljoin
Expand All @@ -21,6 +20,7 @@
from . import app_context
from .utils.hashing import verify_hash
from .utils.http import retry_get
from .utils.io import copy_file

# The type checker can handle finding aiohttp.client but flake8 cannot :-(
if t.TYPE_CHECKING:
Expand Down Expand Up @@ -384,6 +384,7 @@ def __init__(
galaxy_server: t.Optional[str] = None,
collection_cache: str | None = None,
context: t.Optional[GalaxyContext] = None,
trust_collection_cache: t.Optional[bool] = None,
) -> None:
"""
Create an object to download collections from galaxy.
Expand All @@ -398,12 +399,20 @@ def __init__(
These tarballs will be used instead of downloading new tarballs provided that the
versions match the criteria (latest compatible version known to galaxy).
Defaults to ``lib_ctx.get().collection_cache``.
:kwarg trust_collection_cache: If set to ``True``, will assume that if the collection
cache contains an artifact, it is the current one available on the Galaxy server.
This avoids making a request to the Galaxy server to figure out the artifact's
checksum and comparting it before trusting the cached artifact.
"""
super().__init__(aio_session, galaxy_server=galaxy_server, context=context)
self.download_dir = download_dir
lib_ctx = app_context.lib_ctx.get()
if collection_cache is None:
collection_cache = app_context.lib_ctx.get().collection_cache
collection_cache = lib_ctx.collection_cache
self.collection_cache: t.Final[str | None] = collection_cache
if trust_collection_cache is None:
trust_collection_cache = lib_ctx.trust_collection_cache
self.trust_collection_cache: t.Final[bool] = trust_collection_cache

async def download(
self,
Expand All @@ -419,24 +428,26 @@ async def download(
:arg version: Version of the collection to download.
:returns: The full path to the downloaded collection.
"""
collection = collection.replace(".", "/")
release_info = await self.get_release_info(collection, version)
namespace, name = collection.split(".", 1)
filename = f"{namespace}-{name}-{version}.tar.gz"
download_filename = os.path.join(self.download_dir, filename)

if self.collection_cache and self.trust_collection_cache:
cached_copy = os.path.join(self.collection_cache, filename)
if os.path.isfile(cached_copy):
await copy_file(cached_copy, download_filename, check_content=False)
return download_filename

release_info = await self.get_release_info(f"{namespace}/{name}", version)
release_url = release_info["download_url"]

download_filename = os.path.join(
self.download_dir, release_info["artifact"]["filename"]
)
sha256sum = release_info["artifact"]["sha256"]

if self.collection_cache:
if release_info["artifact"]["filename"] in os.listdir(
self.collection_cache
):
cached_copy = os.path.join(
self.collection_cache, release_info["artifact"]["filename"]
)
cached_copy = os.path.join(self.collection_cache, filename)
if os.path.isfile(cached_copy):
if await verify_hash(cached_copy, sha256sum):
shutil.copyfile(cached_copy, download_filename)
await copy_file(cached_copy, download_filename, check_content=False)
return download_filename

async with retry_get(
Expand All @@ -459,10 +470,8 @@ async def download(

# Copy downloaded collection into cache
if self.collection_cache:
cached_copy = os.path.join(
self.collection_cache, release_info["artifact"]["filename"]
)
shutil.copyfile(download_filename, cached_copy)
cached_copy = os.path.join(self.collection_cache, filename)
await copy_file(download_filename, cached_copy, check_content=False)

return download_filename

Expand Down
11 changes: 10 additions & 1 deletion src/antsibull_core/schemas/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from ..utils.collections import ContextDict
from .config import DEFAULT_LOGGING_CONFIG, LoggingModel
from .validators import convert_none, convert_path
from .validators import convert_bool, convert_none, convert_path


class BaseModel(p.BaseModel):
Expand Down Expand Up @@ -111,6 +111,10 @@ class LibContext(BaseModel):
:ivar pypi_url: URL of the pypi server to query for information
:ivar collection_cache: If set, must be a path pointing to a directory where collection
tarballs are cached so they do not need to be downloaded from Galaxy twice.
:ivar trust_collection_cache: If set to ``True``, will assume that if the collection
cache contains an artifact, it is the current one available on the Galaxy server.
This avoids making a request to the Galaxy server to figure out the artifact's
checksum and comparing it before trusting the cached artifact.
"""

chunksize: int = 4096
Expand All @@ -131,6 +135,7 @@ class LibContext(BaseModel):
# pyre-ignore[8]: https://github.com/samuelcolvin/pydantic/issues/1684
pypi_url: p.HttpUrl = "https://pypi.org/" # type: ignore[assignment]
collection_cache: t.Optional[str] = None
trust_collection_cache: bool = False

# pylint: disable-next=unused-private-member
__convert_nones = p.validator("process_max", pre=True, allow_reuse=True)(
Expand All @@ -140,3 +145,7 @@ class LibContext(BaseModel):
__convert_paths = p.validator("collection_cache", pre=True, allow_reuse=True)(
convert_path
)
# pylint: disable-next=unused-private-member
__convert_bools = p.validator("trust_collection_cache", pre=True, allow_reuse=True)(
convert_bool
)
9 changes: 7 additions & 2 deletions src/antsibull_core/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,23 @@
mlog = log.fields(mod=__name__)


async def copy_file(source_path: StrOrBytesPath, dest_path: StrOrBytesPath) -> None:
async def copy_file(
source_path: StrOrBytesPath, dest_path: StrOrBytesPath, check_content: bool = True
) -> None:
"""
Copy content from one file to another.
:arg source_path: Source path. Must be a file.
:arg dest_path: Destination path.
:kwarg check_content: If ``True`` (default) and ``lib_ctx.file_check_content > 0`` and the
destination file exists, first check whether source and destination are potentially equal
before actually copying,
"""
flog = mlog.fields(func="copy_file")
flog.debug("Enter")

lib_ctx = app_context.lib_ctx.get()
if lib_ctx.file_check_content > 0:
if check_content and lib_ctx.file_check_content > 0:
# Check whether the destination file exists and has the same content as the source file,
# in which case we won't overwrite the destination file
try:
Expand Down

0 comments on commit 59f1d05

Please sign in to comment.