From ab74c2ab102641307af0a8305fceec3bf209d609 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 15 Sep 2021 00:24:24 -0400 Subject: [PATCH] Update remote artifact urls on sync if the remote or repo changes closes: #9395 https://pulp.plan.io/issues/9395 --- CHANGES/9395.bugfix | 1 + pulpcore/plugin/stages/artifact_stages.py | 30 ++++--- .../functional/api/using_plugin/constants.py | 7 ++ .../api/using_plugin/test_content_delivery.py | 79 ++++++++++++++++++- 4 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 CHANGES/9395.bugfix diff --git a/CHANGES/9395.bugfix b/CHANGES/9395.bugfix new file mode 100644 index 0000000000..5fc8f5c9b4 --- /dev/null +++ b/CHANGES/9395.bugfix @@ -0,0 +1 @@ +Fixed an issue where on_demand content might not be downloaded properly if the remote URL was changed (even if re-synced). diff --git a/pulpcore/plugin/stages/artifact_stages.py b/pulpcore/plugin/stages/artifact_stages.py index 42e650b45b..e7e314dc72 100644 --- a/pulpcore/plugin/stages/artifact_stages.py +++ b/pulpcore/plugin/stages/artifact_stages.py @@ -276,13 +276,11 @@ async def run(self): The coroutine for this stage. """ async for batch in self.batches(): - await sync_to_async(RemoteArtifact.objects.bulk_get_or_create)( - await self._needed_remote_artifacts(batch) - ) + await self._handle_remote_artifacts(batch) for d_content in batch: await self.put(d_content) - async def _needed_remote_artifacts(self, batch): + async def _handle_remote_artifacts(self, batch): """ Build a list of only :class:`~pulpcore.plugin.models.RemoteArtifact` that need to be created for the batch. @@ -318,7 +316,8 @@ async def _needed_remote_artifacts(self, batch): # # We can end up with duplicates (diff pks, same sha256) in the sequence below, # so we store by-sha256 and then return the final values - needed_ras = {} # { str(): RemoteArtifact, ... } + ras_to_create = {} # { str(): RemoteArtifact, ... } + ras_to_update = {} for d_content in batch: for d_artifact in d_content.d_artifacts: if not d_artifact.remote: @@ -379,14 +378,25 @@ async def _needed_remote_artifacts(self, batch): async for remote_artifact in sync_to_async_iterable( content_artifact._remote_artifact_saver_ras ): - if remote_artifact.remote_id == d_artifact.remote.pk: + if d_artifact.url == remote_artifact.url: + break + + if d_artifact.remote.pk == remote_artifact.remote_id: + key = f"{content_artifact.pk}-{remote_artifact.remote_id}" + remote_artifact.url = d_artifact.url + ras_to_update[key] = remote_artifact break else: remote_artifact = self._create_remote_artifact(d_artifact, content_artifact) - key = f"{str(content_artifact.pk)}-{str(d_artifact.remote.pk)}" - needed_ras[key] = remote_artifact - - return list(needed_ras.values()) + key = f"{content_artifact.pk}-{d_artifact.remote.pk}" + ras_to_create[key] = remote_artifact + + if ras_to_create: + await sync_to_async(RemoteArtifact.objects.bulk_create)(list(ras_to_create.values())) + if ras_to_update: + await sync_to_async(RemoteArtifact.objects.bulk_update)( + list(ras_to_update.values()), fields=["url"] + ) @staticmethod def _create_remote_artifact(d_artifact, content_artifact): diff --git a/pulpcore/tests/functional/api/using_plugin/constants.py b/pulpcore/tests/functional/api/using_plugin/constants.py index 36a3fb5192..5ccd93d5e6 100644 --- a/pulpcore/tests/functional/api/using_plugin/constants.py +++ b/pulpcore/tests/functional/api/using_plugin/constants.py @@ -47,6 +47,13 @@ FILE_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "file/") """The URL to a file repository.""" +FILE_FIXTURE_WITH_MISSING_FILES_URL = urljoin(PULP_FIXTURES_BASE_URL, "file-manifest/") +"""The URL to a file repository with missing files.""" + +FILE_FIXTURE_WITH_MISSING_FILES_MANIFEST_URL = urljoin( + FILE_FIXTURE_WITH_MISSING_FILES_URL, "PULP_MANIFEST" +) +"""The URL to a file repository with missing files manifest.""" FILE_CHUNKED_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "file-chunked/") """The URL to a file repository.""" diff --git a/pulpcore/tests/functional/api/using_plugin/test_content_delivery.py b/pulpcore/tests/functional/api/using_plugin/test_content_delivery.py index 116c7164fc..5e1b7ac904 100644 --- a/pulpcore/tests/functional/api/using_plugin/test_content_delivery.py +++ b/pulpcore/tests/functional/api/using_plugin/test_content_delivery.py @@ -5,7 +5,7 @@ from urllib.parse import urljoin from pulp_smash import api, config, utils -from pulp_smash.pulp3.bindings import delete_orphans +from pulp_smash.pulp3.bindings import delete_orphans, monitor_task, PulpTestCase from pulp_smash.pulp3.constants import ON_DEMAND_DOWNLOAD_POLICIES from pulp_smash.pulp3.utils import ( download_content_unit, @@ -16,16 +16,26 @@ ) from requests import HTTPError +from pulpcore.client.pulp_file import ( + PublicationsFileApi, + RemotesFileApi, + RepositoriesFileApi, + RepositorySyncURL, + DistributionsFileApi, +) from pulpcore.tests.functional.api.using_plugin.constants import ( FILE_CONTENT_NAME, FILE_DISTRIBUTION_PATH, FILE_FIXTURE_URL, + FILE_FIXTURE_MANIFEST_URL, + FILE_FIXTURE_WITH_MISSING_FILES_MANIFEST_URL, FILE_REMOTE_PATH, FILE_REPO_PATH, ) from pulpcore.tests.functional.api.using_plugin.utils import ( create_file_publication, gen_file_remote, + gen_file_client, ) from pulpcore.tests.functional.api.using_plugin.utils import ( # noqa:F401 set_up_module as setUpModule, @@ -105,3 +115,70 @@ def test_content_remote_delete(self): ).hexdigest() self.assertEqual(pulp_hash, fixtures_hash) + + +class RemoteArtifactUpdateTestCase(PulpTestCase): + @classmethod + def setUpClass(cls): + """Clean out Pulp before testing.""" + delete_orphans() + client = gen_file_client() + cls.repo_api = RepositoriesFileApi(client) + cls.remote_api = RemotesFileApi(client) + cls.publication_api = PublicationsFileApi(client) + cls.distributions_api = DistributionsFileApi(client) + cls.cfg = config.get_config() + + def tearDown(self): + """Clean up Pulp after testing.""" + self.doCleanups() + delete_orphans() + + def test_remote_artifact_url_update(self): + """Test that downloading on_demand content works after a repository layout change.""" + + FILE_NAME = "1.iso" + + # 1. Create a remote, repository and distribution - remote URL has links that should 404 + remote_config = gen_file_remote( + policy="on_demand", url=FILE_FIXTURE_WITH_MISSING_FILES_MANIFEST_URL + ) + remote = self.remote_api.create(remote_config) + self.addCleanup(self.remote_api.delete, remote.pulp_href) + + repo = self.repo_api.create(gen_repo(autopublish=True, remote=remote.pulp_href)) + self.addCleanup(self.repo_api.delete, repo.pulp_href) + + body = gen_distribution(repository=repo.pulp_href) + distribution_response = self.distributions_api.create(body) + created_resources = monitor_task(distribution_response.task).created_resources + distribution = self.distributions_api.read(created_resources[0]) + self.addCleanup(self.distributions_api.delete, distribution.pulp_href) + + # 2. Sync the repository, verify that downloading artifacts fails + repository_sync_data = RepositorySyncURL(remote=remote.pulp_href) + + sync_response = self.repo_api.sync(repo.pulp_href, repository_sync_data) + monitor_task(sync_response.task) + + with self.assertRaises(HTTPError): + download_content_unit(self.cfg, distribution.to_dict(), FILE_NAME) + + # 3. Update the remote URL with one that works, sync again, check that downloading + # artifacts works. + update_response = self.remote_api.update( + remote.pulp_href, gen_file_remote(policy="on_demand", url=FILE_FIXTURE_MANIFEST_URL) + ) + monitor_task(update_response.task) + + sync_response = self.repo_api.sync(repo.pulp_href, repository_sync_data) + monitor_task(sync_response.task) + + content = download_content_unit(self.cfg, distribution.to_dict(), FILE_NAME) + pulp_hash = hashlib.sha256(content).hexdigest() + + fixtures_hash = hashlib.sha256( + utils.http_get(urljoin(FILE_FIXTURE_URL, FILE_NAME)) + ).hexdigest() + + self.assertEqual(pulp_hash, fixtures_hash)