Skip to content

Commit

Permalink
Support zstd-compressed repositories
Browse files Browse the repository at this point in the history
A repository may now be compressed using zstd. Extend support
to allow for this, in addition to continuing to support
gzip-encoded repositories.
  • Loading branch information
nicois committed Jul 2, 2024
1 parent 1b38cfd commit d275477
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"python-dateutil>=2.8.1,<3",
"botocore>=1.23.50,<2",
"lxml>=4.6.5,<5",
"zstd>=1.5.5.1",
]

[project.optional-dependencies]
Expand Down
1 change: 1 addition & 0 deletions rpm_s3_mirror.spec
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Requires: python3-requests
Requires: python3-dateutil
Requires: python3-botocore
Requires: python3-lxml
Requires: python3-zstd
Requires: systemd
Requires: zchunk

Expand Down
39 changes: 24 additions & 15 deletions rpm_s3_mirror/repository.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/
import dataclasses
import lzma
import zstd
import re
import subprocess
from abc import abstractmethod
Expand All @@ -16,6 +17,7 @@
from tempfile import TemporaryDirectory
import os
import shutil
from pathlib import Path
from os.path import join, basename

import gzip
Expand Down Expand Up @@ -211,6 +213,15 @@ def _compress(self, root, open_size, open_checksum):
)


def decompress(filename: Path | str) -> bytes:
try:
with open(filename, "rb") as f:
return zstd.decompress(f.read())
except zstd.Error:
with gzip.open(filename) as f:
return f.read()


class ZCKUpdateInfoSection(UpdateInfoSection):
def _read(self):
return subprocess.check_output(["unzck", self.path, "--stdout"])
Expand Down Expand Up @@ -348,19 +359,18 @@ def create_snapshot(self, scratch_dir):
def _rewrite_primary(self, temp_dir, primary: RepodataSection):
with self._req(self.session.get, path=primary.location, stream=True) as request:
local_path = download_repodata_section(primary, request, temp_dir)
with gzip.open(local_path) as f:
file_bytes = f.read()
primary_xml = safe_parse_xml(xml_bytes=file_bytes)
open_checksum = sha256(content=file_bytes)
open_size = len(file_bytes)
for package_element in primary_xml:
location = package_element.find("common:location", namespaces=namespaces)
# As our S3 structure is https://<base-repo>/snapshots/<snapshot-uuid>/, and the "location"
# attribute of the packages in primary.xml references a path relative to the root like:
# "Packages/v/vim.rmp", we need to rewrite this location to point to back a few directories
# from our snapshot root.
relative_location = f"../../{location.get('href')}"
location.set("href", relative_location)
file_bytes = decompress(local_path)
primary_xml = safe_parse_xml(xml_bytes=file_bytes)
open_checksum = sha256(content=file_bytes)
open_size = len(file_bytes)
for package_element in primary_xml:
location = package_element.find("common:location", namespaces=namespaces)
# As our S3 structure is https://<base-repo>/snapshots/<snapshot-uuid>/, and the "location"
# attribute of the packages in primary.xml references a path relative to the root like:
# "Packages/v/vim.rmp", we need to rewrite this location to point to back a few directories
# from our snapshot root.
relative_location = f"../../{location.get('href')}"
location.set("href", relative_location)

# Now we have rewritten our XML file the checksums no longer match, so calculate some new ones (along with
# size etc from above).
Expand Down Expand Up @@ -404,8 +414,7 @@ def _extract_package_list(self, primary: RepodataSection) -> PackageList:
with self._req(self.session.get, path=primary.location, stream=True) as request:
with TemporaryDirectory(prefix="/var/tmp/") as temp_dir:
local_path = download_repodata_section(primary, request, temp_dir)
with gzip.open(local_path) as f:
return PackageList(base_url=self.base_url, packages_xml=f.read())
return PackageList(base_url=self.base_url, packages_xml=decompress(local_path))

def parse_repomd(self, xml: Element) -> Dict[str, RepodataSection]:
sections = {}
Expand Down

0 comments on commit d275477

Please sign in to comment.