Skip to content

Commit

Permalink
Add ability to publish package indices using AptByHash format
Browse files Browse the repository at this point in the history
closes pulp#795
  • Loading branch information
Andrew Cheng authored and daviddavis committed Nov 20, 2023
1 parent 18896c8 commit ced4f49
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGES/795.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added an option to publish `by-hash/` files to mitigate the Hash Sum Mismatch error in debian repos as specified here: https://wiki.debian.org/DebianRepository/Format#indices_acquisition_via_hashsums_.28by-hash.29.
Use the APT_BY_HASH setting to enable this feature.
16 changes: 16 additions & 0 deletions docs/feature_overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,22 @@ That way, the repository version created in the target repository, can be meanin
We are also planning to expand the advanced copy feature with a :ref:`dependency solving <dependency_solving>` mechanism in the future.


.. _apt_by_hash:

ApyByHash
--------------------------------------------------------------------------------

AptByHash is a feature that mitigates commonplace 'Hash Sum Mismatch' errors during an 'apt-get-update'.
It adds the checksum of the package metadata to the the packages' names.
These files are then stored within a 'by-hash' directory within each release architecture in the specified debian repository.
The client will then use the filename to identify the expected checksum and download a file whose name matches the checksum.

Please note that this feature is disabled by default and should be enabled prior to use.
To do this, set `APT_BY_HASH = True` in `/pulp_deb/app/settings.py`.

In addition, you are responsible for setting up a reverse proxy with cache in order to cache the by-hash files.


Roadmap and Experimental
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions pulp_deb/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
http://docs.pulpproject.org/en/3.0/nightly/plugins/plugin-writer/index.html
"""

APT_BY_HASH = False
FORBIDDEN_CHECKSUM_WARNINGS = True
FORCE_IGNORE_MISSING_PACKAGE_INDICES = False
27 changes: 27 additions & 0 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
from contextlib import suppress
from pathlib import Path

from datetime import datetime, timezone
from debian import deb822
Expand Down Expand Up @@ -44,6 +45,7 @@
CHECKSUM_TYPE_MAP,
)

from pulp_deb.app.settings import APT_BY_HASH

import logging
from gettext import gettext as _
Expand Down Expand Up @@ -352,6 +354,23 @@ def finish(self):
publication=self.parent.publication, file=File(open(gz_package_index_path, "rb"))
)
gz_package_index.save()

# Generating metadata files using checksum
if APT_BY_HASH:
for path, index in (
(package_index_path, package_index),
(gz_package_index_path, gz_package_index),
):
for checksum in settings.ALLOWED_CONTENT_CHECKSUMS:
if checksum in CHECKSUM_TYPE_MAP:
hashed_index_path = _fetch_file_checksum(path, index, checksum)
hashed_index = PublishedMetadata.create_from_file(
publication=self.parent.publication,
file=File(open(path, "rb")),
relative_path=hashed_index_path,
)
hashed_index.save()

self.parent.add_metadata(package_index)
self.parent.add_metadata(gz_package_index)
# Publish Sources Indices file
Expand Down Expand Up @@ -406,6 +425,7 @@ def __init__(
self.release["Components"] = "" # Will be set later
if release.description != NULL_VALUE:
self.release["Description"] = release.description
self.release["Acquire-By-Hash"] = "yes" if APT_BY_HASH else "no"

for checksum_type, deb_field in CHECKSUM_TYPE_MAP.items():
if checksum_type in settings.ALLOWED_CONTENT_CHECKSUMS:
Expand Down Expand Up @@ -480,3 +500,10 @@ def _zip_file(file_path):
with GzipFile(gz_file_path, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
return gz_file_path


def _fetch_file_checksum(file_path, index, checksum):
digest = getattr(index.contentartifact_set.first().artifact, checksum)
checksum_type = CHECKSUM_TYPE_MAP[checksum]
hashed_path = Path(file_path).parents[0] / "by-hash" / checksum_type / digest
return hashed_path

0 comments on commit ced4f49

Please sign in to comment.