From 91debdaa7c4216a090cd443843e59faa43898190 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 6 Jul 2023 10:35:44 -0400 Subject: [PATCH] Rebase and cleanup up Improve `contents` functional test to verify the URI values. Use `location` response header to return the tarball URI in upload, and verify that. --- .../server/api/resources/intake_base.py | 22 ++++--- .../query_apis/datasets/datasets_contents.py | 5 +- .../test/functional/server/test_datasets.py | 63 +++++++++++++++---- lib/pbench/test/unit/server/test_relay.py | 8 +-- lib/pbench/test/unit/server/test_upload.py | 35 ++++++++--- 5 files changed, 97 insertions(+), 36 deletions(-) diff --git a/lib/pbench/server/api/resources/intake_base.py b/lib/pbench/server/api/resources/intake_base.py index 8889f6b8eb..12642fb2f1 100644 --- a/lib/pbench/server/api/resources/intake_base.py +++ b/lib/pbench/server/api/resources/intake_base.py @@ -198,6 +198,9 @@ def _intake( username: Optional[str] = None tmp_dir: Optional[Path] = None + prefix = current_app.server_config.rest_uri + origin = f"{self._get_uri_base(request).host}{prefix}/datasets/" + try: try: authorized_user = Auth.token_auth.current_user() @@ -284,7 +287,14 @@ def _intake( f"Duplicate dataset {intake.md5!r} ({dataset_name!r}) is missing" ) from e else: - response = jsonify(dict(message="Dataset already exists")) + response = jsonify( + { + "message": "Dataset already exists", + "name": dataset_name, + "resource_id": intake.md5, + } + ) + response.headers["location"] = f"{origin}{intake.md5}/inventory/" response.status_code = HTTPStatus.OK return response except APIAbort: @@ -500,21 +510,13 @@ def _intake( except Exception as e: current_app.logger.warning("Error removing {}: {}", tmp_dir, str(e)) - prefix = current_app.server_config.rest_uri - origin = ( - f"{self._get_uri_base(request).host}{prefix}/datasets/{dataset.resource_id}" - ) - response = jsonify( { "message": "File successfully uploaded", "name": dataset.name, "resource_id": dataset.resource_id, - "uris": { - "tarball": origin + "/inventory/", - "visualize": origin + "/visualize", - }, } ) + response.headers["location"] = f"{origin}{dataset.resource_id}/inventory/" response.status_code = HTTPStatus.CREATED return response diff --git a/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py b/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py index 6eb303e1ba..c426f1d0bc 100644 --- a/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py +++ b/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py @@ -203,6 +203,7 @@ def postprocess(self, es_json: JSON, context: ApiContext) -> JSON: prefix = current_app.server_config.rest_uri origin = f"{self._get_uri_base(request).host}{prefix}/datasets/{resource_id}" + path = "" if target == "/" else target dir_list = [] file_list = [] @@ -210,12 +211,12 @@ def postprocess(self, es_json: JSON, context: ApiContext) -> JSON: if val["_source"]["directory"] == target: # Retrieve files list if present else add an empty list. for f in val["_source"].get("files", []): - f["uri"] = f"{origin}/inventory{target}/{f['name']}" + f["uri"] = f"{origin}/inventory{path}/{f['name']}" file_list.append(f) elif val["_source"]["parent"] == target: name = val["_source"]["name"] dir_list.append( - {"name": name, "uri": f"{origin}/contents{target}/{name}"} + {"name": name, "uri": f"{origin}/contents{path}/{name}"} ) return {"directories": dir_list, "files": file_list} diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index 310e56eb32..b113fbf159 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -72,15 +72,10 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user): "message": "File successfully uploaded", "name": name, "resource_id": md5, - "uris": { - "tarball": server_client._uri( - API.DATASETS_INVENTORY, {"dataset": md5, "target": ""} - ), - "visualize": server_client._uri( - API.DATASETS_VISUALIZE, {"dataset": md5} - ), - }, } + assert response.headers["location"] == server_client._uri( + API.DATASETS_INVENTORY, {"dataset": md5, "target": ""} + ) print(f"\t... uploaded {name}: {a}") datasets = server_client.get_list( @@ -109,10 +104,20 @@ def test_upload_again(self, server_client: PbenchServerClient, login_user): but with an OK (200) response instead of CREATED (201) """ duplicate = next(iter(TARBALL_DIR.glob("*.tar.xz"))) + name = Dataset.stem(duplicate) + md5 = Dataset.md5(duplicate) response = server_client.upload(duplicate) assert ( response.status_code == HTTPStatus.OK ), f"upload returned unexpected status {response.status_code}, {response.text}" + assert response.json() == { + "message": "Dataset already exists", + "name": name, + "resource_id": md5, + } + assert response.headers["location"] == server_client._uri( + API.DATASETS_INVENTORY, {"dataset": md5, "target": ""} + ) @staticmethod def test_bad_md5(server_client: PbenchServerClient, login_user): @@ -149,11 +154,20 @@ def test_archive_only(server_client: PbenchServerClient, login_user): validate that it doesn't get enabled for unpacking or indexing. """ tarball = SPECIAL_DIR / "fio_mock_2020.01.19T00.18.06.tar.xz" + name = Dataset.stem(tarball) md5 = Dataset.md5(tarball) response = server_client.upload(tarball, metadata={"server.archiveonly:y"}) assert ( response.status_code == HTTPStatus.CREATED - ), f"upload {Dataset.stem(tarball)} returned unexpected status {response.status_code}, {response.text}" + ), f"upload {name} returned unexpected status {response.status_code}, {response.text}" + assert response.json() == { + "message": "File successfully uploaded", + "name": name, + "resource_id": md5, + } + assert response.headers["location"] == server_client._uri( + API.DATASETS_INVENTORY, {"dataset": md5, "target": ""} + ) metadata = server_client.get_metadata( md5, ["dataset.operations", "server.archiveonly"] ) @@ -180,11 +194,20 @@ def test_no_metadata(server_client: PbenchServerClient, login_user): validate that it doesn't get enabled for unpacking or indexing. """ tarball = SPECIAL_DIR / "nometadata.tar.xz" + name = Dataset.stem(tarball) md5 = Dataset.md5(tarball) response = server_client.upload(tarball) assert ( response.status_code == HTTPStatus.CREATED - ), f"upload {Dataset.stem(tarball)} returned unexpected status {response.status_code}, {response.text}" + ), f"upload {name} returned unexpected status {response.status_code}, {response.text}" + assert response.json() == { + "message": "File successfully uploaded", + "name": name, + "resource_id": md5, + } + assert response.headers["location"] == server_client._uri( + API.DATASETS_INVENTORY, {"dataset": md5, "target": ""} + ) metadata = server_client.get_metadata( md5, ["dataset.operations", "dataset.metalog", "server.archiveonly"] ) @@ -545,9 +568,27 @@ def test_contents(self, server_client: PbenchServerClient, login_user): archive = dataset.metadata["server.archiveonly"] assert json["directories"] or json["files"] or archive - # Unless archiveonly, we need a metadata.log + # Even if they're empty, both values must be lists + assert isinstance(json["directories"], list) + assert isinstance(json["files"], list) + + # Unless archiveonly, we need at least a metadata.log assert archive or "metadata.log" in (f["name"] for f in json["files"]) + for d in json["directories"]: + uri = server_client._uri( + API.DATASETS_CONTENTS, + {"dataset": dataset.resource_id, "target": d["name"]}, + ) + assert d["uri"] == uri, f"{d['name']} uri is incorrect: {d['uri']}" + + for f in json["files"]: + uri = server_client._uri( + API.DATASETS_INVENTORY, + {"dataset": dataset.resource_id, "target": f["name"]}, + ) + assert f["uri"] == uri, f"{f['name']} uri is incorrect: {f['uri']}" + @pytest.mark.dependency(name="visualize", depends=["upload"], scope="session") def test_visualize(self, server_client: PbenchServerClient, login_user): """Check that we can generate visualization data from a dataset diff --git a/lib/pbench/test/unit/server/test_relay.py b/lib/pbench/test/unit/server/test_relay.py index b11f423c81..6b49a38518 100644 --- a/lib/pbench/test/unit/server/test_relay.py +++ b/lib/pbench/test/unit/server/test_relay.py @@ -127,11 +127,11 @@ def test_relay(self, client, server_config, pbench_drb_token, tarball): "message": "File successfully uploaded", "name": name, "resource_id": md5, - "uris": { - "tarball": f"https://localhost/api/v1/datasets/{md5}/inventory/", - "visualize": f"https://localhost/api/v1/datasets/{md5}/visualize", - }, } + assert ( + response.headers["location"] + == f"https://localhost/api/v1/datasets/{md5}/inventory/" + ) audit = Audit.query() assert len(audit) == 2 diff --git a/lib/pbench/test/unit/server/test_upload.py b/lib/pbench/test/unit/server/test_upload.py index 61fd23514b..ee0d4d386d 100644 --- a/lib/pbench/test/unit/server/test_upload.py +++ b/lib/pbench/test/unit/server/test_upload.py @@ -424,11 +424,11 @@ def test_upload(self, client, pbench_drb_token, server_config, tarball): "message": "File successfully uploaded", "name": name, "resource_id": md5, - "uris": { - "tarball": f"https://localhost/api/v1/datasets/{md5}/inventory/", - "visualize": f"https://localhost/api/v1/datasets/{md5}/visualize", - }, } + assert ( + response.headers["location"] + == f"https://localhost/api/v1/datasets/{md5}/inventory/" + ) dataset = Dataset.query(resource_id=md5) assert dataset is not None @@ -520,6 +520,15 @@ def test_upload_duplicate(self, client, server_config, pbench_drb_token, tarball ) assert response.status_code == HTTPStatus.CREATED, repr(response) + assert response.json == { + "message": "File successfully uploaded", + "name": Dataset.stem(datafile), + "resource_id": md5, + } + assert ( + response.headers["location"] + == f"https://localhost/api/v1/datasets/{md5}/inventory/" + ) # Reset manually since we upload twice in this test TestUpload.cachemanager_created = None @@ -532,7 +541,15 @@ def test_upload_duplicate(self, client, server_config, pbench_drb_token, tarball ) assert response.status_code == HTTPStatus.OK, repr(response) - assert response.json.get("message") == "Dataset already exists" + assert response.json == { + "message": "Dataset already exists", + "name": Dataset.stem(datafile), + "resource_id": md5, + } + assert ( + response.headers["location"] + == f"https://localhost/api/v1/datasets/{md5}/inventory/" + ) # We didn't get far enough to create a CacheManager assert TestUpload.cachemanager_created is None @@ -681,11 +698,11 @@ def test_upload_nometa(self, client, pbench_drb_token, server_config, tarball): "message": "File successfully uploaded", "name": name, "resource_id": md5, - "uris": { - "tarball": f"https://localhost/api/v1/datasets/{md5}/inventory/", - "visualize": f"https://localhost/api/v1/datasets/{md5}/visualize", - }, } + assert ( + response.headers["location"] + == f"https://localhost/api/v1/datasets/{md5}/inventory/" + ) dataset = Dataset.query(resource_id=md5) assert dataset is not None