Skip to content

Commit

Permalink
Generating checksum named metadata files for AptByHash (squashed)
Browse files Browse the repository at this point in the history
closes pulp#795
  • Loading branch information
adamsanaglo committed Jul 24, 2023
1 parent b620904 commit 88c8829
Show file tree
Hide file tree
Showing 11 changed files with 39 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGES/795.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Mitigating the Hash Sum Mismatch error in debian repos by generating additional metadata files named by checksum.
Wiki of the feature: https://wiki.ubuntu.com/AptByHash
1 change: 1 addition & 0 deletions functest_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pulp-smash @ git+https://github.com/pulp/pulp-smash.git
pytest
python-debian>=0.1.36
epdb
Binary file added pulp_deb/app/__pycache__/constants.cpython-38.pyc
Binary file not shown.
Binary file added pulp_deb/app/__pycache__/settings.cpython-38.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions pulp_deb/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@

PUBLISH_RELEASE_FILE_LABEL = False
PUBLISH_RELEASE_FILE_VERSION = False
APT_BY_HASH = True
APT_BY_HASH_CHECKSUM_TYPE = "sha256"
Binary file not shown.
Binary file not shown.
38 changes: 34 additions & 4 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
import shutil
from contextlib import suppress
from pathlib import Path
import hashlib

from datetime import datetime, timezone
from debian import deb822
Expand Down Expand Up @@ -33,11 +35,9 @@

from pulp_deb.app.serializers import Package822Serializer

from pulp_deb.app.constants import (
NO_MD5_WARNING_MESSAGE,
CHECKSUM_TYPE_MAP,
)
from pulp_deb.app.constants import NO_MD5_WARNING_MESSAGE, CHECKSUM_TYPE_MAP

from pulp_deb.app.settings import APT_BY_HASH, APT_BY_HASH_CHECKSUM_TYPE

import logging
from gettext import gettext as _
Expand Down Expand Up @@ -247,6 +247,7 @@ def add_package(self, package):
content_artifact=package.contentartifact_set.get(),
)
published_artifact.save()

package_serializer = Package822Serializer(package, context={"request": None})
package_serializer.to822(self.component).dump(
self.package_index_files[package.architecture][0]
Expand All @@ -262,10 +263,23 @@ def finish(self):
publication=self.parent.publication, file=File(open(package_index_path, "rb"))
)
package_index.save()

gz_package_index = PublishedMetadata.create_from_file(
publication=self.parent.publication, file=File(open(gz_package_index_path, "rb"))
)
gz_package_index.save()

# Generating metadata files using checksum
if APT_BY_HASH and APT_BY_HASH_CHECKSUM_TYPE in settings.ALLOWED_CONTENT_CHECKSUMS:
for path in (package_index_path, gz_package_index_path):
hashed_index_path = _create_checksum_file(path)
hashed_index = PublishedMetadata.create_from_file(
publication=self.parent.publication,
file=File(open(hashed_index_path, "rb")),
)
hashed_index.save()
# Done generating

self.parent.add_metadata(package_index)
self.parent.add_metadata(gz_package_index)

Expand Down Expand Up @@ -383,3 +397,19 @@ def _zip_file(file_path):
with GzipFile(gz_file_path, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
return gz_file_path


def _checksum_file(path):
h = hashlib.new(APT_BY_HASH_CHECKSUM_TYPE)
with open(path, "rb") as f:
for line in f:
h.update(line)
return h.hexdigest()


def _create_checksum_file(file_path):
by_hash_path = Path(file_path).parents[0] / "byhash"
by_hash_path.mkdir(parents=True, exist_ok=True)
hashed_path = by_hash_path / _checksum_file(file_path)
shutil.copyfile(file_path, hashed_path)
return hashed_path
Binary file not shown.

0 comments on commit 88c8829

Please sign in to comment.