From 971ed19811efe0060f270b1b86bd041da4ff1a2e Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Wed, 26 Jul 2023 08:51:08 -0400
Subject: [PATCH] Fix some problems with archive-only behavior  (#3497)

* Fix some problems with archive-only behavior

PBENCH-1215

Webb pointed out a broken functional test in #3496. Once it was testing what I
had wanted it to test, I realized that it wasn't quite working. This contains
some server and functional test changes to make archive-only dataset queries
work correctly and to be sure that's adequately tested in the functional test
cases.
---
 .../api/resources/query_apis/__init__.py      |  2 ++
 .../resources/query_apis/datasets/__init__.py |  9 +++--
 .../test/functional/server/test_datasets.py   | 36 ++++++++++---------
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/lib/pbench/server/api/resources/query_apis/__init__.py b/lib/pbench/server/api/resources/query_apis/__init__.py
index 09d480d421..a72c2c083d 100644
--- a/lib/pbench/server/api/resources/query_apis/__init__.py
+++ b/lib/pbench/server/api/resources/query_apis/__init__.py
@@ -398,6 +398,8 @@ def _call(self, method: Callable, params: ApiParams, context: ApiContext):
                 es_request.get("kwargs").get("json"),
             )
         except Exception as e:
+            if isinstance(e, APIAbort):
+                raise
             raise APIInternalError("Elasticsearch assembly error") from e
 
         try:
diff --git a/lib/pbench/server/api/resources/query_apis/datasets/__init__.py b/lib/pbench/server/api/resources/query_apis/datasets/__init__.py
index 4258a29c0f..19cb264af0 100644
--- a/lib/pbench/server/api/resources/query_apis/datasets/__init__.py
+++ b/lib/pbench/server/api/resources/query_apis/datasets/__init__.py
@@ -118,11 +118,16 @@ def get_index(self, dataset: Dataset, root_index_name: AnyStr) -> AnyStr:
         """Retrieve the list of ES indices from the metadata table based on a
         given root_index_name.
         """
+
+        # Datasets marked "archiveonly" aren't indexed, and can't be referenced
+        # in APIs that rely on Elasticsearch. Instead, we'll raise a CONFLICT
+        # error.
+        if Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE):
+            raise APIAbort(HTTPStatus.CONFLICT, "Dataset indexing was disabled")
+
         try:
             index_map = Metadata.getvalue(dataset=dataset, key=Metadata.INDEX_MAP)
         except MetadataError as exc:
-            if Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE):
-                raise APIAbort(HTTPStatus.CONFLICT, "Dataset is not indexed")
             raise APIInternalError(
                 f"Required metadata {Metadata.INDEX_MAP} missing"
             ) from exc
diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py
index be468ed169..cf7a7cec7d 100644
--- a/lib/pbench/test/functional/server/test_datasets.py
+++ b/lib/pbench/test/functional/server/test_datasets.py
@@ -32,6 +32,10 @@ class Tarball:
     access: str
 
 
+def all_tarballs() -> list[Path]:
+    return list(TARBALL_DIR.glob("*.tar.xz")) + list(SPECIAL_DIR.glob("*.tar.xz"))
+
+
 class TestPut:
     """Test success and failure cases of PUT dataset upload"""
 
@@ -198,12 +202,6 @@ def test_archive_only(server_client: PbenchServerClient, login_user):
         assert operations["UPLOAD"]["state"] == "OK"
         assert "INDEX" not in operations
 
-        # Delete it so we can run the test case again without manual cleanup
-        response = server_client.remove(md5)
-        assert (
-            response.ok
-        ), f"delete failed with {response.status_code}, {response.text}"
-
     @staticmethod
     def test_no_metadata(server_client: PbenchServerClient, login_user):
         """Test handling for a tarball without a metadata.log.
@@ -248,12 +246,6 @@ def test_no_metadata(server_client: PbenchServerClient, login_user):
         assert operations["UPLOAD"]["state"] == "OK"
         assert "INDEX" not in operations
 
-        # Delete it so we can run the test case again without manual cleanup
-        response = server_client.remove(md5)
-        assert (
-            response.ok
-        ), f"delete failed with {response.status_code}, {response.text}"
-
     @staticmethod
     def check_indexed(server_client: PbenchServerClient, datasets):
         indexed = []
@@ -402,7 +394,9 @@ def test_details(self, server_client: PbenchServerClient, login_user):
                     == detail["runMetadata"]["script"]
                 )
             else:
-                assert response.status_code == HTTPStatus.CONFLICT
+                assert (
+                    response.status_code == HTTPStatus.CONFLICT
+                ), f"Unexpected {response.json()['message']}"
                 print(f"\t\t... {d.name} is archiveonly")
 
 
@@ -439,7 +433,7 @@ def test_list_api_key(self, server_client: PbenchServerClient, login_user):
 
         expected = [
             {"resource_id": Dataset.md5(f), "name": Dataset.stem(f), "metadata": {}}
-            for f in TARBALL_DIR.glob("*.tar.xz")
+            for f in all_tarballs()
         ]
         expected.sort(key=lambda e: e["resource_id"])
         actual = [d.json for d in datasets]
@@ -583,6 +577,8 @@ def test_contents(self, server_client: PbenchServerClient, login_user):
             metadata=["server.archiveonly"],
         )
 
+        with_toc = False
+        without_toc = False
         for dataset in datasets:
             response = server_client.get(
                 API.DATASETS_CONTENTS,
@@ -591,9 +587,14 @@ def test_contents(self, server_client: PbenchServerClient, login_user):
             )
             archive = dataset.metadata["server.archiveonly"]
             if archive:
-                assert response.status_code == HTTPStatus.CONFLICT
-                return
+                assert (
+                    response.status_code == HTTPStatus.CONFLICT
+                ), f"Unexpected {response.json()['message']}"
+                assert response.json()["message"] == "Dataset indexing was disabled"
+                without_toc = True
+                continue
 
+            with_toc = True
             assert (
                 response.ok
             ), f"CONTENTS {dataset.name} failed {response.status_code}:{response.json()['message']}"
@@ -625,6 +626,7 @@ def test_contents(self, server_client: PbenchServerClient, login_user):
                     {"dataset": dataset.resource_id, "target": f["name"]},
                 )
                 assert f["uri"] == uri, f"{f['name']} uri is incorrect: {f['uri']}"
+        assert with_toc and without_toc, "expected archiveonly and indexed datasets"
 
     @pytest.mark.dependency(name="visualize", depends=["upload"], scope="session")
     def test_visualize(self, server_client: PbenchServerClient, login_user):
@@ -778,7 +780,7 @@ def test_delete_all(self, server_client: PbenchServerClient, login_user):
                 f"Unexpected HTTP error, url = {exc.response.url}, status code"
                 f" = {exc.response.status_code}, text = {exc.response.text!r}"
             )
-        for t in TARBALL_DIR.glob("*.tar.xz"):
+        for t in all_tarballs():
             resource_id = datasets_hash.get(t.name)
             assert resource_id, f"Expected to find tar ball {t.name} to delete"
             response = server_client.remove(resource_id)