From 2616e22a1f5a4841c7e20f759beac5d2da1c3b16 Mon Sep 17 00:00:00 2001 From: Manisha Singhal Date: Thu, 28 Oct 2021 09:35:15 +0200 Subject: [PATCH] Add experimental structured advanced copy workflow Closes #396 --- CHANGES/396.feature | 1 + MANIFEST.in | 1 + docs/feature_overview.rst | 18 +++ pulp_deb/app/schema/__init__.py | 7 + pulp_deb/app/schema/copy_config.json | 21 +++ pulp_deb/app/serializers/__init__.py | 2 +- .../app/serializers/repository_serializers.py | 59 ++++++++- pulp_deb/app/tasks/__init__.py | 1 + pulp_deb/app/tasks/copy.py | 121 ++++++++++++++++++ pulp_deb/app/urls.py | 5 + pulp_deb/app/viewsets/__init__.py | 2 +- pulp_deb/app/viewsets/repository.py | 84 ++++++++++++ 12 files changed, 319 insertions(+), 3 deletions(-) create mode 100644 CHANGES/396.feature create mode 100644 pulp_deb/app/schema/__init__.py create mode 100644 pulp_deb/app/schema/copy_config.json create mode 100644 pulp_deb/app/tasks/copy.py create mode 100644 pulp_deb/app/urls.py diff --git a/CHANGES/396.feature b/CHANGES/396.feature new file mode 100644 index 000000000..322d5ac51 --- /dev/null +++ b/CHANGES/396.feature @@ -0,0 +1 @@ +Added experimental advanced copy API with support for structured copying. diff --git a/MANIFEST.in b/MANIFEST.in index 4e1ac30b2..26017f666 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ include COMMITMENT include COPYRIGHT include functest_requirements.txt include LICENSE +include pulp_deb/app/schema/* include pulp_deb/tests/functional/sign_deb_release.sh include pyproject.toml include requirements.txt diff --git a/docs/feature_overview.rst b/docs/feature_overview.rst index 29b0b83d8..7032ad219 100644 --- a/docs/feature_overview.rst +++ b/docs/feature_overview.rst @@ -99,6 +99,24 @@ In general, uploading content works the same way as for any other Pulp plugin, s (This latter case can only happen if users attempt to add several colliding packages in a single API call.) +.. _advanced_copy: + +Advanced Copy (EXPERIMENTAL) +-------------------------------------------------------------------------------- + +.. warning:: + This paragraph describes an experimental feature. + It may not work as intended for every corner case, or break unexpectedly. + The API may still change in non-backwards compatible ways. + +The plugin provides a special API endpoint at ``pulp/api/v3/deb/copy/`` providing advanced copy operations when moving packages between repositories. +When specifying a set of packages to be copied from one repository into another (using the default ``structured=True`` setting), this copy operation will automatically add any metadata content associated with the packages in question. +That way, the repository version created in the target repository, can be meaningfully published using :ref:`structured mode `. + +We are planning to add a dependency solving mechanism in a future release. +The idea is for the copy operation to automatically add any dependencies of any user supplied packages as part of the copy operation. + + .. _simple_and_structured_publishing: Simple and Structured Publishing diff --git a/pulp_deb/app/schema/__init__.py b/pulp_deb/app/schema/__init__.py new file mode 100644 index 000000000..8bb3489c4 --- /dev/null +++ b/pulp_deb/app/schema/__init__.py @@ -0,0 +1,7 @@ +import json +import os + +location = os.path.dirname(os.path.realpath(__file__)) + +with open(os.path.join(location, "copy_config.json")) as copy_config_json: + COPY_CONFIG_SCHEMA = json.load(copy_config_json) diff --git a/pulp_deb/app/schema/copy_config.json b/pulp_deb/app/schema/copy_config.json new file mode 100644 index 000000000..898bcfeda --- /dev/null +++ b/pulp_deb/app/schema/copy_config.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CopyConfig", + "description": "Config for copying content between repos", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionProperties": false, + "required": [ "source_repo_version", "dest_repo" ], + "properties": { + "source_repo_version": { "type": "string" }, + "dest_repo": { "type": "string" }, + "dest_base_version": { "type": "integer" }, + "content": { + "type": "array", + "items": { "type": "string" } + } + } + } +} diff --git a/pulp_deb/app/serializers/__init__.py b/pulp_deb/app/serializers/__init__.py index b6169fbc4..b792264f3 100644 --- a/pulp_deb/app/serializers/__init__.py +++ b/pulp_deb/app/serializers/__init__.py @@ -23,4 +23,4 @@ from .remote_serializers import AptRemoteSerializer -from .repository_serializers import AptRepositorySerializer +from .repository_serializers import AptRepositorySerializer, CopySerializer diff --git a/pulp_deb/app/serializers/repository_serializers.py b/pulp_deb/app/serializers/repository_serializers.py index 8f821c7bd..1b08daa18 100644 --- a/pulp_deb/app/serializers/repository_serializers.py +++ b/pulp_deb/app/serializers/repository_serializers.py @@ -1,7 +1,12 @@ -from pulpcore.plugin.serializers import RepositorySerializer +from gettext import gettext as _ +from pulpcore.plugin.serializers import RepositorySerializer, validate_unknown_fields from pulp_deb.app.models import AptRepository +from jsonschema import Draft7Validator +from rest_framework import serializers +from pulp_deb.app.schema import COPY_CONFIG_SCHEMA + class AptRepositorySerializer(RepositorySerializer): """ @@ -11,3 +16,55 @@ class AptRepositorySerializer(RepositorySerializer): class Meta: fields = RepositorySerializer.Meta.fields model = AptRepository + + +class CopySerializer(serializers.Serializer): + """ + A serializer for Content Copy API. + """ + + config = serializers.JSONField( + help_text=_("A JSON document describing sources, destinations, and content to be copied") + ) + + structured = serializers.BooleanField( + help_text=_( + "Also copy any distributions, components, and releases as needed for any packages " + "being copied. This will allow for structured publications of the target repository." + "Default is set to True" + ), + default=True, + ) + + dependency_solving = serializers.BooleanField( + help_text=_( + "Also copy dependencies of any packages being copied. NOT YET" + 'IMPLEMENTED! You must keep this at "False"!' + ), + default=False, + ) + + def validate(self, data): + """ + Validate that the Serializer contains valid data. + Set the DebRepository based on the RepositoryVersion if only the latter is provided. + Set the RepositoryVersion based on the DebRepository if only the latter is provided. + Convert the human-friendly names of the content types into what Pulp needs to query on. + """ + super().validate(data) + + if hasattr(self, "initial_data"): + validate_unknown_fields(self.initial_data, self.fields) + + if "config" in data: + validator = Draft7Validator(COPY_CONFIG_SCHEMA) + + err = [] + for error in sorted(validator.iter_errors(data["config"]), key=str): + err.append(error.message) + if err: + raise serializers.ValidationError( + _("Provided copy criteria is invalid:'{}'".format(err)) + ) + + return data diff --git a/pulp_deb/app/tasks/__init__.py b/pulp_deb/app/tasks/__init__.py index 2a1dd8e4f..e18e3af50 100644 --- a/pulp_deb/app/tasks/__init__.py +++ b/pulp_deb/app/tasks/__init__.py @@ -1,3 +1,4 @@ # flake8: noqa from .publishing import publish, publish_verbatim from .synchronizing import synchronize +from .copy import copy_content diff --git a/pulp_deb/app/tasks/copy.py b/pulp_deb/app/tasks/copy.py new file mode 100644 index 000000000..d2f711557 --- /dev/null +++ b/pulp_deb/app/tasks/copy.py @@ -0,0 +1,121 @@ +from django.db import transaction +from django.db.models import Q + +from pulpcore.plugin.models import Content, RepositoryVersion + +from pulp_deb.app.models import AptRepository, Package, ReleaseArchitecture, PackageReleaseComponent + +import logging +from gettext import gettext as _ + +log = logging.getLogger(__name__) + + +def find_structured_publish_content(content, src_repo_version): + """ + Finds the content for structured publish from packages to be copied and returns it all together. + + Args: + content (iterable): Content for structured publish + src_repo_version (pulpcore.models.RepositoryVersion): Source repo version + + Returns: Queryset of Content objects that extends intial set of content for structured publish + """ + # Content in the source repository version + package_release_component_ids = src_repo_version.content.filter( + pulp_type=PackageReleaseComponent.get_pulp_type() + ).only("pk") + architecture_ids = src_repo_version.content.filter( + pulp_type=ReleaseArchitecture.get_pulp_type() + ).only("pk") + package_release_components = PackageReleaseComponent.objects.filter( + pk__in=package_release_component_ids + ) + + structured_publish_content = set() + + # Packages to be copied + packages = Package.objects.filter(pk__in=content) + structured_publish_content.update(packages.values_list("pk", flat=True)) + + if len(content) != len(packages): + log.warning(_("Additional data with packages is provided. Removing from the content list.")) + + # List of all architectures + architectures = ReleaseArchitecture.objects.filter(pk__in=architecture_ids).values_list( + "pk", flat=True + ) + structured_publish_content.update(architectures) + + # Package release components, release components, release to be copied based on packages + for pckg in package_release_components.iterator(): + if pckg.package in packages: + structured_publish_content.update( + [pckg.pk, pckg.release_component.pk, pckg.release_component.release.pk] + ) + + return Content.objects.filter(pk__in=structured_publish_content) + + +@transaction.atomic +def copy_content(config, structured, dependency_solving): + """ + Copy content from one repo to another. + + Args: + source_repo_version_pk: repository version primary key to copy units from + dest_repo_pk: repository primary key to copy units into + criteria: a dict that maps type to a list of criteria to filter content by. Note that this + criteria MUST be validated before being passed to this task. + content_pks: a list of content pks to copy from source to destination + """ + + def process_entry(entry): + source_repo_version = RepositoryVersion.objects.get(pk=entry["source_repo_version"]) + dest_repo = AptRepository.objects.get(pk=entry["dest_repo"]) + + dest_version_provided = bool(entry.get("dest_base_version")) + if dest_version_provided: + dest_repo_version = RepositoryVersion.objects.get(pk=entry["dest_base_version"]) + else: + dest_repo_version = dest_repo.latest_version() + + if entry.get("content") is not None: + content_filter = Q(pk__in=entry.get("content")) + else: + content_filter = Q() + + log.info(_("Copying: {copy} created").format(copy=content_filter)) + + return ( + source_repo_version, + dest_repo_version, + dest_repo, + content_filter, + dest_version_provided, + ) + + if not dependency_solving: + # No Dependency Solving Branch + # ============================ + for entry in config: + ( + source_repo_version, + dest_repo_version, + dest_repo, + content_filter, + dest_version_provided, + ) = process_entry(entry) + + content_to_copy = source_repo_version.content.filter(content_filter) + if structured: + content_to_copy = find_structured_publish_content( + content_to_copy, source_repo_version + ) + + base_version = dest_repo_version if dest_version_provided else None + + with dest_repo.new_version(base_version=base_version) as new_version: + new_version.add_content(content_to_copy) + else: + raise NotImplementedError("Advanced copy with dependency solving is not yet implemented.") diff --git a/pulp_deb/app/urls.py b/pulp_deb/app/urls.py new file mode 100644 index 000000000..5aed9ce68 --- /dev/null +++ b/pulp_deb/app/urls.py @@ -0,0 +1,5 @@ +from django.urls import path + +from .viewsets import CopyViewSet + +urlpatterns = [path("pulp/api/v3/deb/copy/", CopyViewSet.as_view({"post": "create"}))] diff --git a/pulp_deb/app/viewsets/__init__.py b/pulp_deb/app/viewsets/__init__.py index 8d87c66ab..b65a9a325 100644 --- a/pulp_deb/app/viewsets/__init__.py +++ b/pulp_deb/app/viewsets/__init__.py @@ -17,4 +17,4 @@ from .remote import AptRemoteViewSet -from .repository import AptRepositoryVersionViewSet, AptRepositoryViewSet +from .repository import AptRepositoryVersionViewSet, AptRepositoryViewSet, CopyViewSet diff --git a/pulp_deb/app/viewsets/repository.py b/pulp_deb/app/viewsets/repository.py index f2e885dc5..c18b86a26 100644 --- a/pulp_deb/app/viewsets/repository.py +++ b/pulp_deb/app/viewsets/repository.py @@ -2,17 +2,21 @@ from drf_spectacular.utils import extend_schema from rest_framework.decorators import action +from rest_framework import viewsets +from rest_framework.serializers import ValidationError as DRFValidationError from pulpcore.plugin.actions import ModifyRepositoryActionMixin from pulpcore.plugin.serializers import ( AsyncOperationResponseSerializer, RepositorySyncURLSerializer, ) +from pulpcore.plugin.models import RepositoryVersion from pulpcore.plugin.tasking import dispatch from pulpcore.plugin.viewsets import ( OperationPostponedResponse, RepositoryVersionViewSet, RepositoryViewSet, + NamedModelViewSet, ) from pulp_deb.app import models, serializers, tasks @@ -76,3 +80,83 @@ class AptRepositoryVersionViewSet(RepositoryVersionViewSet): """ parent_viewset = AptRepositoryViewSet + + +class CopyViewSet(viewsets.ViewSet): + """ + ViewSet for the content copy API endpoint. + """ + + serializer_class = serializers.CopySerializer + + @extend_schema( + description="Trigger an asynchronous task to copy APT content" + "from one repository into another, creating a new" + "repository version.", + summary="Copy content", + operation_id="copy_content", + request=serializers.CopySerializer, + responses={202: AsyncOperationResponseSerializer}, + ) + def create(self, request): + """Copy content.""" + serializer = serializers.CopySerializer(data=request.data, context={"request": request}) + serializer.is_valid(raise_exception=True) + + config = serializer.validated_data["config"] + structured = serializer.validated_data["structured"] + dependency_solving = serializer.validated_data["dependency_solving"] + + config, shared_repos, exclusive_repos = self._process_config(config) + + async_result = dispatch( + tasks.copy_content, + shared_resources=shared_repos, + exclusive_resources=exclusive_repos, + args=[config, structured, dependency_solving], + kwargs={}, + ) + return OperationPostponedResponse(async_result, request) + + def _process_config(self, config): + """ + Change the hrefs into pks within config. + This method also implicitly validates that the hrefs map to objects and it returns a list of + repos so that the task can lock on them. + """ + result = [] + # exclusive use of the destination repos is needed since new repository versions are being + # created, but source repos can be accessed in a read-only fashion in parallel, so long + # as there are no simultaneous modifications. + shared_repos = [] + exclusive_repos = [] + + for entry in config: + r = dict() + source_version = NamedModelViewSet().get_resource( + entry["source_repo_version"], RepositoryVersion + ) + dest_repo = NamedModelViewSet().get_resource(entry["dest_repo"], models.AptRepository) + r["source_repo_version"] = source_version.pk + r["dest_repo"] = dest_repo.pk + shared_repos.append(source_version.repository) + exclusive_repos.append(dest_repo) + + if "dest_base_version" in entry: + try: + r["dest_base_version"] = dest_repo.versions.get( + number=entry["dest_base_version"] + ).pk + except RepositoryVersion.DoesNotExist: + message = _( + "Version {version} does not exist for repository " "'{repo}'." + ).format(version=entry["dest_base_version"], repo=dest_repo.name) + raise DRFValidationError(detail=message) + + if entry.get("content") is not None: + r["content"] = [] + for c in entry["content"]: + r["content"].append(NamedModelViewSet().extract_pk(c)) + result.append(r) + + return result, shared_repos, exclusive_repos