From 971ed19811efe0060f270b1b86bd041da4ff1a2e Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Wed, 26 Jul 2023 08:51:08 -0400 Subject: [PATCH] Fix some problems with archive-only behavior (#3497) * Fix some problems with archive-only behavior PBENCH-1215 Webb pointed out a broken functional test in #3496. Once it was testing what I had wanted it to test, I realized that it wasn't quite working. This contains some server and functional test changes to make archive-only dataset queries work correctly and to be sure that's adequately tested in the functional test cases. --- .../api/resources/query_apis/__init__.py | 2 ++ .../resources/query_apis/datasets/__init__.py | 9 +++-- .../test/functional/server/test_datasets.py | 36 ++++++++++--------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/lib/pbench/server/api/resources/query_apis/__init__.py b/lib/pbench/server/api/resources/query_apis/__init__.py index 09d480d421..a72c2c083d 100644 --- a/lib/pbench/server/api/resources/query_apis/__init__.py +++ b/lib/pbench/server/api/resources/query_apis/__init__.py @@ -398,6 +398,8 @@ def _call(self, method: Callable, params: ApiParams, context: ApiContext): es_request.get("kwargs").get("json"), ) except Exception as e: + if isinstance(e, APIAbort): + raise raise APIInternalError("Elasticsearch assembly error") from e try: diff --git a/lib/pbench/server/api/resources/query_apis/datasets/__init__.py b/lib/pbench/server/api/resources/query_apis/datasets/__init__.py index 4258a29c0f..19cb264af0 100644 --- a/lib/pbench/server/api/resources/query_apis/datasets/__init__.py +++ b/lib/pbench/server/api/resources/query_apis/datasets/__init__.py @@ -118,11 +118,16 @@ def get_index(self, dataset: Dataset, root_index_name: AnyStr) -> AnyStr: """Retrieve the list of ES indices from the metadata table based on a given root_index_name. """ + + # Datasets marked "archiveonly" aren't indexed, and can't be referenced + # in APIs that rely on Elasticsearch. Instead, we'll raise a CONFLICT + # error. + if Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE): + raise APIAbort(HTTPStatus.CONFLICT, "Dataset indexing was disabled") + try: index_map = Metadata.getvalue(dataset=dataset, key=Metadata.INDEX_MAP) except MetadataError as exc: - if Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE): - raise APIAbort(HTTPStatus.CONFLICT, "Dataset is not indexed") raise APIInternalError( f"Required metadata {Metadata.INDEX_MAP} missing" ) from exc diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index be468ed169..cf7a7cec7d 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -32,6 +32,10 @@ class Tarball: access: str +def all_tarballs() -> list[Path]: + return list(TARBALL_DIR.glob("*.tar.xz")) + list(SPECIAL_DIR.glob("*.tar.xz")) + + class TestPut: """Test success and failure cases of PUT dataset upload""" @@ -198,12 +202,6 @@ def test_archive_only(server_client: PbenchServerClient, login_user): assert operations["UPLOAD"]["state"] == "OK" assert "INDEX" not in operations - # Delete it so we can run the test case again without manual cleanup - response = server_client.remove(md5) - assert ( - response.ok - ), f"delete failed with {response.status_code}, {response.text}" - @staticmethod def test_no_metadata(server_client: PbenchServerClient, login_user): """Test handling for a tarball without a metadata.log. @@ -248,12 +246,6 @@ def test_no_metadata(server_client: PbenchServerClient, login_user): assert operations["UPLOAD"]["state"] == "OK" assert "INDEX" not in operations - # Delete it so we can run the test case again without manual cleanup - response = server_client.remove(md5) - assert ( - response.ok - ), f"delete failed with {response.status_code}, {response.text}" - @staticmethod def check_indexed(server_client: PbenchServerClient, datasets): indexed = [] @@ -402,7 +394,9 @@ def test_details(self, server_client: PbenchServerClient, login_user): == detail["runMetadata"]["script"] ) else: - assert response.status_code == HTTPStatus.CONFLICT + assert ( + response.status_code == HTTPStatus.CONFLICT + ), f"Unexpected {response.json()['message']}" print(f"\t\t... {d.name} is archiveonly") @@ -439,7 +433,7 @@ def test_list_api_key(self, server_client: PbenchServerClient, login_user): expected = [ {"resource_id": Dataset.md5(f), "name": Dataset.stem(f), "metadata": {}} - for f in TARBALL_DIR.glob("*.tar.xz") + for f in all_tarballs() ] expected.sort(key=lambda e: e["resource_id"]) actual = [d.json for d in datasets] @@ -583,6 +577,8 @@ def test_contents(self, server_client: PbenchServerClient, login_user): metadata=["server.archiveonly"], ) + with_toc = False + without_toc = False for dataset in datasets: response = server_client.get( API.DATASETS_CONTENTS, @@ -591,9 +587,14 @@ def test_contents(self, server_client: PbenchServerClient, login_user): ) archive = dataset.metadata["server.archiveonly"] if archive: - assert response.status_code == HTTPStatus.CONFLICT - return + assert ( + response.status_code == HTTPStatus.CONFLICT + ), f"Unexpected {response.json()['message']}" + assert response.json()["message"] == "Dataset indexing was disabled" + without_toc = True + continue + with_toc = True assert ( response.ok ), f"CONTENTS {dataset.name} failed {response.status_code}:{response.json()['message']}" @@ -625,6 +626,7 @@ def test_contents(self, server_client: PbenchServerClient, login_user): {"dataset": dataset.resource_id, "target": f["name"]}, ) assert f["uri"] == uri, f"{f['name']} uri is incorrect: {f['uri']}" + assert with_toc and without_toc, "expected archiveonly and indexed datasets" @pytest.mark.dependency(name="visualize", depends=["upload"], scope="session") def test_visualize(self, server_client: PbenchServerClient, login_user): @@ -778,7 +780,7 @@ def test_delete_all(self, server_client: PbenchServerClient, login_user): f"Unexpected HTTP error, url = {exc.response.url}, status code" f" = {exc.response.status_code}, text = {exc.response.text!r}" ) - for t in TARBALL_DIR.glob("*.tar.xz"): + for t in all_tarballs(): resource_id = datasets_hash.get(t.name) assert resource_id, f"Expected to find tar ball {t.name} to delete" response = server_client.remove(resource_id)