Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PULP-76] Remove is highest #2039

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES/1052.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CollectionVersion global uniqueness constraint is now its sha256 digest. Repository level uniqueness
is still (namespace, name, version).
7 changes: 7 additions & 0 deletions CHANGES/1052.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Added the final migration to make the sha256 of the collection version artifact the uniqueness
constraint. This allows users to serve their own interpretation of the content in their private
repositories.
The migration will only succeed if all the content has been adjusted. To account for content that
was not migrated by the migration shipped with 0.22.0, you can run the content repair command
``datarepair-ansible-collection-sha256`` prior to upgrading.
This version removed the content repair command.
1 change: 1 addition & 0 deletions CHANGES/1550.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove the `is_highest` field from CollectionVersion table.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.2.14 on 2022-07-21 23:05

from django.db import migrations, models
from pulpcore.plugin.migrations import RequireVersion


class Migration(migrations.Migration):

dependencies = [
('ansible', '0057_collectionversion_sha256_migrate'),
('core', '0110_apiappstatus'),
]

operations = [
RequireVersion("ansible", "0.23.0"),
migrations.AlterField(
model_name='collectionversion',
name='sha256',
field=models.CharField(db_index=True, max_length=64, null=False),
),
migrations.AlterUniqueTogether(
name='collectionversion',
unique_together={('sha256',)},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.16 on 2024-11-21 08:53

from django.db import migrations
from pulpcore.plugin.migrations import RequireVersion


class Migration(migrations.Migration):

dependencies = [
("ansible", "0058_collectionversion_unique_sha256"),
]

operations = [
RequireVersion("ansible", "0.23.0"),
migrations.RemoveConstraint(
model_name="collectionversion",
name="unique_is_highest",
),
migrations.RemoveField(
model_name="collectionversion",
name="is_highest",
),
]
28 changes: 3 additions & 25 deletions pulp_ansible/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from django.conf import settings
from django.db import models
from django.db.models import UniqueConstraint, Q
from django.db.utils import IntegrityError
from django.contrib.postgres import fields as psql_fields
from django.contrib.postgres import search as psql_search
Expand Down Expand Up @@ -150,10 +149,6 @@ class CollectionVersion(Content):
repository (models.CharField): The URL of the originating SCM repository.
version (models.CharField): The version of the collection.
requires_ansible (models.CharField): The version of Ansible required to use the collection.
is_highest (models.BooleanField): Indicates that the version is the highest one
in the collection. Import and sync workflows update this field, which then
triggers the database to [re]build the search_vector.
This field is Deprecated and scheduled for removal as soon as 0.24.0.

Relations:

Expand All @@ -180,17 +175,14 @@ class CollectionVersion(Content):
namespace = models.CharField(max_length=64, editable=False)
repository = models.CharField(default="", blank=True, max_length=2000, editable=False)
requires_ansible = models.CharField(null=True, max_length=255)
sha256 = models.CharField(max_length=64, null=True, blank=False)
sha256 = models.CharField(max_length=64, db_index=True, null=False, blank=False)

version = models.CharField(max_length=128, db_collation="pulp_ansible_semver")
version_major = models.IntegerField()
version_minor = models.IntegerField()
version_patch = models.IntegerField()
version_prerelease = models.CharField(max_length=128)

# This field is deprecated. We keep it for some releases for 0-Downtime upgrades.
is_highest = models.BooleanField(editable=False, default=False)

# Foreign Key Fields
collection = models.ForeignKey(
Collection, on_delete=models.PROTECT, related_name="versions", editable=False
Expand All @@ -202,16 +194,9 @@ class CollectionVersion(Content):
# a migration file. The trigger only runs when the table is
# updated. CollectionVersions are INSERT'ed into the table, so
# the search_vector does not get populated at initial creation
# time. In the import or sync workflows, is_highest gets toggled
# back and forth, which causes an UPDATE operation and then the
# search_vector is built.
# time.
search_vector = psql_search.SearchVectorField(default="")
Comment on lines 194 to 198
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering about this search vector.


@classmethod
def natural_key_fields(cls):
# This method should be removed when the uniqueness is properly switched to sha256.
return cls._meta.unique_together[0]

@hook(BEFORE_SAVE)
def calculate_version_parts(self):
v = Version(self.version)
Expand All @@ -236,14 +221,7 @@ def __str__(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = (("namespace", "name", "version"), ("sha256",))
constraints = [
UniqueConstraint(
fields=("collection", "is_highest"),
name="unique_is_highest",
condition=Q(is_highest=True),
)
]
unique_together = ("sha256",)


class CollectionVersionMark(Content):
Expand Down
24 changes: 3 additions & 21 deletions pulp_ansible/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,10 +505,9 @@ def deferred_validate(self, data):
# Call super to ensure that data contains artifact
data = super().deferred_validate(data)
artifact = data.get("artifact")

if (sha256 := data.get("sha256")) and sha256 != artifact.sha256:
raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256"))
else:
data["sha256"] = artifact.sha256

collection_info = process_collection_artifact(
artifact=artifact,
Expand All @@ -519,35 +518,18 @@ def deferred_validate(self, data):
# repository field clashes
collection_info["origin_repository"] = collection_info.pop("repository", None)
data.update(collection_info)
# `retrieve` needs artifact, but it won't be in validated_data because of `get_artifacts`
self.context["artifact"] = artifact

return data

def retrieve(self, validated_data):
"""Reuse existing CollectionVersion if provided artifact matches."""
namespace = validated_data["namespace"]
name = validated_data["name"]
version = validated_data["version"]
artifact = self.context["artifact"]
# TODO switch this check to use digest when ColVersion uniqueness constraint is changed
col = CollectionVersion.objects.filter(
namespace=namespace, name=name, version=version
).first()
if col:
if col._artifacts.get() != artifact:
raise ValidationError(
_("Collection {}.{}-{} already exists with a different artifact").format(
namespace, name, version
)
)

return col
return CollectionVersion.objects.filter(sha256=validated_data["sha256"]).first()

def create(self, validated_data):
"""Final step in creating the CollectionVersion."""
tags = validated_data.pop("tags")
origin_repository = validated_data.pop("origin_repository")

# Create CollectionVersion from its metadata and adds to repository if specified
content = super().create(validated_data)

Expand Down
8 changes: 3 additions & 5 deletions pulp_ansible/app/tasks/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ async def declarative_content_from_git_repo(remote, url, git_ref=None, metadata_
version = metadata["metadata"]["version"]
else:
raise e
collection_version = await sync_to_async(CollectionVersion.objects.get)(
collection_version = await CollectionVersion.objects.aget(
namespace=namespace, name=name, version=version
)
if artifact is None:
Expand Down Expand Up @@ -275,7 +275,7 @@ def import_collection(
temp_file = PulpTemporaryFile.objects.get(pk=temp_file_pk)
filename = CollectionFilename(expected_namespace, expected_name, expected_version)
log.info(f"Processing collection from {temp_file.file.name}")
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection")
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collections.import_collection")

try:
with temp_file.file.open() as artifact_file:
Expand Down Expand Up @@ -673,9 +673,7 @@ async def _add_namespace(self, name, namespace_sha):
"""Adds A Namespace metadata content to the pipeline."""

try:
ns = await sync_to_async(AnsibleNamespaceMetadata.objects.get)(
metadata_sha256=namespace_sha
)
ns = await AnsibleNamespaceMetadata.objects.aget(metadata_sha256=namespace_sha)
await self.put(DeclarativeContent(ns))
return True
except AnsibleNamespaceMetadata.DoesNotExist:
Expand Down
4 changes: 3 additions & 1 deletion pulp_ansible/app/tasks/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def process_collection_artifact(artifact, namespace, name, version):

# Set up logging for CollectionImport object
CollectionImport.objects.get_or_create(task_id=Task.current().pulp_id)
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection")
user_facing_logger = logging.getLogger(
"pulp_ansible.app.tasks.upload.process_collection_artifact"
)

artifact_url = reverse("artifacts-detail", args=[artifact.pk])
filename = CollectionFilename(namespace, name, version)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Tests related to sync ansible plugin collection content type."""

import hashlib
from pathlib import Path

import pytest

from pulp_ansible.tests.functional.utils import randstr
Expand Down Expand Up @@ -99,3 +102,32 @@ def test_content_unit_lifecycle(ansible_bindings, build_and_upload_collection, m
ansible_bindings.ContentCollectionVersionsApi.create(file=collection_artifact.filename).task
)
assert content_unit_href in create_task.created_resources


@pytest.mark.parallel
def test_duplicate_collection_key(
ansible_bindings,
ansible_repo_factory,
build_and_upload_collection,
monitor_task,
):
"""Create two content units with the same name but different artifacts."""
repository1 = ansible_repo_factory()
repository2 = ansible_repo_factory()

attrs = {"namespace": randstr(), "name": "squeezer", "version": "0.0.9"}
collection_artifact1, content_unit_href1 = build_and_upload_collection(
repository1, config=attrs
)
collection_artifact2, content_unit_href2 = build_and_upload_collection(
repository2, config=attrs
)

sha256_1 = hashlib.sha256(Path(collection_artifact1.filename).read_bytes()).hexdigest()
sha256_2 = hashlib.sha256(Path(collection_artifact2.filename).read_bytes()).hexdigest()
assert sha256_1 != sha256_2

content_unit_1 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href1)
assert content_unit_1.sha256 == sha256_1
content_unit_2 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href2)
assert content_unit_2.sha256 == sha256_2
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ def test_collection_version_sha256_migrate(migrate):

cv = CollectionVersion.objects.get(pk=cv.pk)
assert cv.sha256 == "SENTINEL"

apps = migrate([("ansible", "0058_collectionversion_unique_sha256")])
CollectionVersion = apps.get_model("ansible", "CollectionVersion")

cv = CollectionVersion.objects.get(pk=cv.pk)
assert cv.sha256 == "SENTINEL"