Skip to content

Commit

Permalink
Merge pull request #891 from ATIX-AG/advanced_copy_performance
Browse files Browse the repository at this point in the history
Improve advanced copy performance

(cherry picked from commit 2cf0106)
  • Loading branch information
quba42 authored and patchback[bot] committed Sep 28, 2023
1 parent 300e723 commit f7347a3
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 73 deletions.
1 change: 1 addition & 0 deletions CHANGES/870.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved the performance of structured ``/pulp/api/v3/deb/copy/`` actions.
118 changes: 54 additions & 64 deletions pulp_deb/app/tasks/copy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.db import transaction
from django.db.models import Q

from pulpcore.plugin.models import Content, RepositoryVersion
from pulpcore.plugin.models import RepositoryVersion

from pulp_deb.app.models import (
AptRepository,
Expand All @@ -17,7 +17,7 @@
log = logging.getLogger(__name__)


def find_structured_publish_content(content, src_repo_version):
def find_structured_publish_content(content, source_repo_version):
"""
Finds the content for structured publish from packages to be copied and returns it all together.
Expand All @@ -27,43 +27,42 @@ def find_structured_publish_content(content, src_repo_version):
Returns: Queryset of Content objects that extends intial set of content for structured publish
"""
# Content in the source repository version
package_release_component_ids = src_repo_version.content.filter(
pulp_type=PackageReleaseComponent.get_pulp_type()
).only("pk")
architecture_ids = src_repo_version.content.filter(
pulp_type=ReleaseArchitecture.get_pulp_type()
).only("pk")
package_release_components = PackageReleaseComponent.objects.filter(
pk__in=package_release_component_ids
)
# Packages:
package_content_qs = content.filter(pulp_type=Package.get_pulp_type()).only("pk")
package_qs = Package.objects.filter(pk__in=package_content_qs)

structured_publish_content = set()
# PackageReleaseComponents:
package_prc_qs = PackageReleaseComponent.objects.filter(package__in=package_qs.only("pk")).only(
"pk"
)
prc_content_qs = source_repo_version.content.filter(pk__in=package_prc_qs)
prc_qs = PackageReleaseComponent.objects.filter(pk__in=prc_content_qs.only("pk"))

# ReleaseComponents:
release_component_ids = set()
distributions = set()
for prc in prc_qs.select_related("release_component").iterator():
release_component_ids.add(prc.release_component.pk)
distributions.add(prc.release_component.distribution)

release_component_content_qs = source_repo_version.content.filter(
pk__in=release_component_ids
).only("pk")

# Packages to be copied
packages = Package.objects.filter(pk__in=content)
structured_publish_content.update(packages.values_list("pk", flat=True))
# ReleaseArchitectures:
architectures = list(package_qs.values_list("architecture", flat=True).distinct())
architecture_qs = ReleaseArchitecture.objects.filter(
architecture__in=architectures, distribution__in=distributions
).only("pk")

if len(content) != len(packages):
log.warning(_("Additional data with packages is provided. Removing from the content list."))
# Releases:
release_qs = Release.objects.filter(distribution__in=distributions).only("pk")

# List of all architectures
architectures = ReleaseArchitecture.objects.filter(pk__in=architecture_ids).values_list(
"pk", flat=True
combined_content_qs = content.only("pk").union(
prc_qs.only("pk"), release_component_content_qs, architecture_qs, release_qs
)
structured_publish_content.update(architectures)

# Package release components, release components, release to be copied based on packages
for pckg in package_release_components.iterator():
if pckg.package in packages:
structured_publish_content.update([pckg.pk, pckg.release_component.pk])
release = Release.objects.filter(
pk__in=src_repo_version.content, distribution=pckg.release_component.distribution
).first()
if release:
structured_publish_content.update([release.pk])

return Content.objects.filter(pk__in=structured_publish_content)
return source_repo_version.content.filter(pk__in=combined_content_qs)


@transaction.atomic
Expand All @@ -82,12 +81,11 @@ def copy_content(config, structured, dependency_solving):
def process_entry(entry):
source_repo_version = RepositoryVersion.objects.get(pk=entry["source_repo_version"])
dest_repo = AptRepository.objects.get(pk=entry["dest_repo"])

dest_version_provided = bool(entry.get("dest_base_version"))
if dest_version_provided:
dest_repo_version = RepositoryVersion.objects.get(pk=entry["dest_base_version"])
else:
dest_repo_version = dest_repo.latest_version()
dest_base_version = (
RepositoryVersion.objects.get(pk=entry["dest_base_version"])
if bool(entry.get("dest_base_version"))
else None
)

if entry.get("content") is not None:
content_filter = Q(pk__in=entry.get("content"))
Expand All @@ -98,33 +96,25 @@ def process_entry(entry):

return (
source_repo_version,
dest_repo_version,
dest_repo,
dest_base_version,
content_filter,
dest_version_provided,
)

if not dependency_solving:
# No Dependency Solving Branch
# ============================
for entry in config:
(
source_repo_version,
dest_repo_version,
dest_repo,
content_filter,
dest_version_provided,
) = process_entry(entry)

content_to_copy = source_repo_version.content.filter(content_filter)
if structured:
content_to_copy = find_structured_publish_content(
content_to_copy, source_repo_version
)

base_version = dest_repo_version if dest_version_provided else None

with dest_repo.new_version(base_version=base_version) as new_version:
new_version.add_content(content_to_copy)
else:
if dependency_solving:
raise NotImplementedError("Advanced copy with dependency solving is not yet implemented.")

for entry in config:
(
source_repo_version,
dest_repo,
dest_base_version,
content_filter,
) = process_entry(entry)

content_to_copy = source_repo_version.content.filter(content_filter)
if structured:
content_to_copy = find_structured_publish_content(content_to_copy, source_repo_version)

with dest_repo.new_version(base_version=dest_base_version) as new_version:
new_version.add_content(content_to_copy)
2 changes: 0 additions & 2 deletions pulp_deb/tests/functional/api/test_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ def test_copy_all(
assert DEB_FULL_ADVANCED_COPY_FIXTURE_SUMMARY == deb_get_added_content_summary(target_repo)


# FIXME: Can be enabled once the following is fixed: https://github.com/pulp/pulp_deb/issues/870
@pytest.mark.skip("Skip - due to faulty behaviour in the copy task.")
@pytest.mark.parallel
def test_copy_empty_content(
deb_init_and_sync,
Expand Down
10 changes: 3 additions & 7 deletions pulp_deb/tests/functional/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ def _clean_dict(d):
DEB_ADVANCED_COPY_FIXTURE_SUMMARY = _clean_dict(
{
DEB_RELEASE_NAME: 2,
# FIXME: this value needs to be adapted once the copy task is refactored.
# see: https://github.com/pulp/pulp_deb/issues/870
DEB_RELEASE_ARCHITECTURE_NAME: 3,
DEB_RELEASE_ARCHITECTURE_NAME: 2,
DEB_RELEASE_COMPONENT_NAME: 2,
DEB_RELEASE_FILE_NAME: 0,
DEB_PACKAGE_INDEX_NAME: 0,
Expand All @@ -93,12 +91,10 @@ def _clean_dict(d):
DEB_FULL_ADVANCED_COPY_FIXTURE_SUMMARY = _clean_dict(
{
DEB_RELEASE_NAME: 2,
# FIXME: this value needs to be adapted once the copy task is refactored.
# see: https://github.com/pulp/pulp_deb/issues/870
DEB_RELEASE_ARCHITECTURE_NAME: 3,
DEB_RELEASE_COMPONENT_NAME: 3,
DEB_RELEASE_FILE_NAME: 0,
DEB_PACKAGE_INDEX_NAME: 0,
DEB_RELEASE_FILE_NAME: 2,
DEB_PACKAGE_INDEX_NAME: 5,
DEB_PACKAGE_RELEASE_COMPONENT_NAME: 7,
DEB_INSTALLER_FILE_INDEX_NAME: 0,
DEB_PACKAGE_NAME: 4,
Expand Down

0 comments on commit f7347a3

Please sign in to comment.