From ced4f49414ec3c09ffd88fbc1673efc2d7443e1d Mon Sep 17 00:00:00 2001 From: Andrew Cheng Date: Fri, 25 Aug 2023 12:12:58 -0700 Subject: [PATCH] Add ability to publish package indices using AptByHash format closes #795 --- CHANGES/795.feature | 2 ++ docs/feature_overview.rst | 16 ++++++++++++++++ pulp_deb/app/settings.py | 1 + pulp_deb/app/tasks/publishing.py | 27 +++++++++++++++++++++++++++ 4 files changed, 46 insertions(+) create mode 100644 CHANGES/795.feature diff --git a/CHANGES/795.feature b/CHANGES/795.feature new file mode 100644 index 000000000..0aa1b13d1 --- /dev/null +++ b/CHANGES/795.feature @@ -0,0 +1,2 @@ +Added an option to publish `by-hash/` files to mitigate the Hash Sum Mismatch error in debian repos as specified here: https://wiki.debian.org/DebianRepository/Format#indices_acquisition_via_hashsums_.28by-hash.29. +Use the APT_BY_HASH setting to enable this feature. diff --git a/docs/feature_overview.rst b/docs/feature_overview.rst index b366aac4d..7a8dbd82c 100644 --- a/docs/feature_overview.rst +++ b/docs/feature_overview.rst @@ -144,6 +144,22 @@ That way, the repository version created in the target repository, can be meanin We are also planning to expand the advanced copy feature with a :ref:`dependency solving ` mechanism in the future. +.. _apt_by_hash: + +ApyByHash +-------------------------------------------------------------------------------- + +AptByHash is a feature that mitigates commonplace 'Hash Sum Mismatch' errors during an 'apt-get-update'. +It adds the checksum of the package metadata to the the packages' names. +These files are then stored within a 'by-hash' directory within each release architecture in the specified debian repository. +The client will then use the filename to identify the expected checksum and download a file whose name matches the checksum. + +Please note that this feature is disabled by default and should be enabled prior to use. +To do this, set `APT_BY_HASH = True` in `/pulp_deb/app/settings.py`. + +In addition, you are responsible for setting up a reverse proxy with cache in order to cache the by-hash files. + + Roadmap and Experimental ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pulp_deb/app/settings.py b/pulp_deb/app/settings.py index 65260a130..5f0614561 100644 --- a/pulp_deb/app/settings.py +++ b/pulp_deb/app/settings.py @@ -5,5 +5,6 @@ http://docs.pulpproject.org/en/3.0/nightly/plugins/plugin-writer/index.html """ +APT_BY_HASH = False FORBIDDEN_CHECKSUM_WARNINGS = True FORCE_IGNORE_MISSING_PACKAGE_INDICES = False diff --git a/pulp_deb/app/tasks/publishing.py b/pulp_deb/app/tasks/publishing.py index 3a7313f39..bc1cdabe8 100644 --- a/pulp_deb/app/tasks/publishing.py +++ b/pulp_deb/app/tasks/publishing.py @@ -2,6 +2,7 @@ import os import shutil from contextlib import suppress +from pathlib import Path from datetime import datetime, timezone from debian import deb822 @@ -44,6 +45,7 @@ CHECKSUM_TYPE_MAP, ) +from pulp_deb.app.settings import APT_BY_HASH import logging from gettext import gettext as _ @@ -352,6 +354,23 @@ def finish(self): publication=self.parent.publication, file=File(open(gz_package_index_path, "rb")) ) gz_package_index.save() + + # Generating metadata files using checksum + if APT_BY_HASH: + for path, index in ( + (package_index_path, package_index), + (gz_package_index_path, gz_package_index), + ): + for checksum in settings.ALLOWED_CONTENT_CHECKSUMS: + if checksum in CHECKSUM_TYPE_MAP: + hashed_index_path = _fetch_file_checksum(path, index, checksum) + hashed_index = PublishedMetadata.create_from_file( + publication=self.parent.publication, + file=File(open(path, "rb")), + relative_path=hashed_index_path, + ) + hashed_index.save() + self.parent.add_metadata(package_index) self.parent.add_metadata(gz_package_index) # Publish Sources Indices file @@ -406,6 +425,7 @@ def __init__( self.release["Components"] = "" # Will be set later if release.description != NULL_VALUE: self.release["Description"] = release.description + self.release["Acquire-By-Hash"] = "yes" if APT_BY_HASH else "no" for checksum_type, deb_field in CHECKSUM_TYPE_MAP.items(): if checksum_type in settings.ALLOWED_CONTENT_CHECKSUMS: @@ -480,3 +500,10 @@ def _zip_file(file_path): with GzipFile(gz_file_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out) return gz_file_path + + +def _fetch_file_checksum(file_path, index, checksum): + digest = getattr(index.contentartifact_set.first().artifact, checksum) + checksum_type = CHECKSUM_TYPE_MAP[checksum] + hashed_path = Path(file_path).parents[0] / "by-hash" / checksum_type / digest + return hashed_path