From 1eebe0fd595f8b07471069b905bd048d939eaf26 Mon Sep 17 00:00:00 2001 From: Siddardh Ramesh <30310381+siddardh-ra@users.noreply.github.com> Date: Tue, 27 Jun 2023 02:58:52 +0530 Subject: [PATCH] Compare datasets - Integrate Quisby into Pbench Server API (#3470) PBENCH-1189 --------- Co-authored-by: siddardh --- lib/pbench/client/__init__.py | 1 + lib/pbench/server/api/__init__.py | 7 + .../server/api/resources/datasets_compare.py | 125 +++++++++++++ .../test/unit/server/test_datasets_compare.py | 166 ++++++++++++++++++ .../unit/server/test_endpoint_configure.py | 1 + 5 files changed, 300 insertions(+) create mode 100644 lib/pbench/server/api/resources/datasets_compare.py create mode 100644 lib/pbench/test/unit/server/test_datasets_compare.py diff --git a/lib/pbench/client/__init__.py b/lib/pbench/client/__init__.py index 61686cbbd0..c67b418348 100644 --- a/lib/pbench/client/__init__.py +++ b/lib/pbench/client/__init__.py @@ -39,6 +39,7 @@ class API(Enum): """ DATASETS = "datasets" + DATASETS_COMPARE = "datasets_compare" DATASETS_CONTENTS = "datasets_contents" DATASETS_DETAIL = "datasets_detail" DATASETS_INVENTORY = "datasets_inventory" diff --git a/lib/pbench/server/api/__init__.py b/lib/pbench/server/api/__init__.py index 32ebe56065..54ab0ee834 100644 --- a/lib/pbench/server/api/__init__.py +++ b/lib/pbench/server/api/__init__.py @@ -14,6 +14,7 @@ from pbench.common.logger import get_pbench_logger from pbench.server import PbenchServerConfig from pbench.server.api.resources.api_key import APIKeyManage +from pbench.server.api.resources.datasets_compare import DatasetsCompare from pbench.server.api.resources.datasets_inventory import DatasetsInventory from pbench.server.api.resources.datasets_list import DatasetsList from pbench.server.api.resources.datasets_metadata import DatasetsMetadata @@ -63,6 +64,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig): endpoint="datasets", resource_class_args=(config,), ) + api.add_resource( + DatasetsCompare, + f"{base_uri}/compare", + endpoint="datasets_compare", + resource_class_args=(config,), + ) api.add_resource( DatasetsContents, f"{base_uri}/datasets//contents/", diff --git a/lib/pbench/server/api/resources/datasets_compare.py b/lib/pbench/server/api/resources/datasets_compare.py new file mode 100644 index 0000000000..5f79b03083 --- /dev/null +++ b/lib/pbench/server/api/resources/datasets_compare.py @@ -0,0 +1,125 @@ +from http import HTTPStatus +from urllib.request import Request + +from flask import current_app, jsonify +from flask.wrappers import Response +from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing + +from pbench.server import OperationCode, PbenchServerConfig +from pbench.server.api.resources import ( + APIAbort, + ApiAuthorization, + ApiAuthorizationType, + ApiBase, + ApiContext, + APIInternalError, + ApiMethod, + ApiParams, + ApiSchema, + Parameter, + ParamType, + Schema, +) +from pbench.server.cache_manager import ( + CacheManager, + TarballNotFound, + TarballUnpackError, +) +from pbench.server.database.models.datasets import Metadata + + +class DatasetsCompare(ApiBase): + """ + This class implements the Server API used to retrieve comparison data for visualization. + """ + + def __init__(self, config: PbenchServerConfig): + super().__init__( + config, + ApiSchema( + ApiMethod.GET, + OperationCode.READ, + query_schema=Schema( + Parameter( + "datasets", + ParamType.LIST, + element_type=ParamType.DATASET, + string_list=",", + required=True, + ), + ), + authorization=ApiAuthorizationType.NONE, + ), + ) + + def _get( + self, params: ApiParams, request: Request, context: ApiContext + ) -> Response: + """ + This function is using Quisby to compare results into a form that supports visualization + + Args: + params: includes the uri parameters, which provide the list of dataset. + request: Original incoming Request object + context: API context dictionary + + Raises: + UnauthorizedAccess : The user isn't authorized for the requested access. + APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR" + APIInternalError, reporting the failure message + + GET /api/v1/compare?datasets=d1,d2,d3 + """ + + datasets = params.query.get("datasets") + benchmark_choice = None + for dataset in datasets: + benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script") + # Validate if all the selected datasets is of same benchmark + if not benchmark_choice: + benchmark_choice = benchmark + elif benchmark != benchmark_choice: + raise APIAbort( + HTTPStatus.BAD_REQUEST, + f"Selected dataset benchmarks must match: {benchmark_choice} and {benchmark} cannot be compared.", + ) + + # Validate if the user is authorized to access the selected datasets + self._check_authorization( + ApiAuthorization( + ApiAuthorizationType.USER_ACCESS, + OperationCode.READ, + dataset.owner_id, + dataset.access, + ) + ) + cache_m = CacheManager(self.config, current_app.logger) + stream_file = {} + for dataset in datasets: + try: + tarball = cache_m.find_dataset(dataset.resource_id) + except TarballNotFound as e: + raise APIInternalError( + f"Expected dataset with ID '{dataset.resource_id}' is missing from the cache manager." + ) from e + try: + file = tarball.extract( + tarball.tarball_path, f"{tarball.name}/result.csv" + ) + except TarballUnpackError as e: + raise APIInternalError(str(e)) from e + stream_file[dataset.name] = file + + benchmark_type = BenchmarkName.__members__.get(benchmark.upper()) + if not benchmark_type: + raise APIAbort( + HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}" + ) + get_quisby_data = QuisbyProcessing().compare_csv_to_json( + benchmark_type, InputType.STREAM, stream_file + ) + if get_quisby_data["status"] != "success": + raise APIInternalError( + f"Quisby processing failure. Exception: {get_quisby_data['exception']}" + ) + return jsonify(get_quisby_data) diff --git a/lib/pbench/test/unit/server/test_datasets_compare.py b/lib/pbench/test/unit/server/test_datasets_compare.py new file mode 100644 index 0000000000..0f6dd2ba9d --- /dev/null +++ b/lib/pbench/test/unit/server/test_datasets_compare.py @@ -0,0 +1,166 @@ +from http import HTTPStatus +from pathlib import Path +from typing import Optional + +from pquisby.lib.post_processing import QuisbyProcessing +import pytest +import requests + +from pbench.server import JSON +from pbench.server.cache_manager import CacheManager, TarballUnpackError +from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata +from pbench.server.database.models.users import User + + +def mock_get_value(dataset: Dataset, key: str, user: Optional[User] = None) -> str: + if dataset.name == "uperf_3" or dataset.name == "uperf_4": + return "hammerDB" + return "uperf" + + +class TestCompareDatasets: + @pytest.fixture() + def query_get_as(self, client, server_config, more_datasets, get_token_func): + """ + Helper fixture to perform the API query and validate an expected + return status. + + Args: + client: Flask test API client fixture + server_config: Pbench config fixture + more_datasets: Dataset construction fixture + get_token_func: Pbench token fixture + """ + + def query_api( + datasets: list, user: str, expected_status: HTTPStatus + ) -> requests.Response: + ds_list = [] + for dataset in datasets: + try: + dataset_id = Dataset.query(name=dataset).resource_id + ds_list.append(dataset_id) + except DatasetNotFound: + ds_list.append(dataset) # Allow passing deliberately bad value + headers = None + if user: + headers = {"authorization": f"bearer {get_token_func(user)}"} + response = client.get( + f"{server_config.rest_uri}/compare", + query_string={"datasets": ds_list}, + headers=headers, + ) + assert response.status_code == expected_status + return response + + return query_api + + class MockTarball: + tarball_path = Path("/dataset/tarball.tar.xz") + name = "tarball" + + @staticmethod + def extract(_tarball_path: Path, _path: str) -> str: + return "CSV_file_as_a_string" + + def mock_find_dataset(self, dataset) -> MockTarball: + # Validate the resource_id + Dataset.query(resource_id=dataset) + return self.MockTarball() + + def test_dataset_not_present(self, query_get_as, monkeypatch): + monkeypatch.setattr(Metadata, "getvalue", mock_get_value) + + query_get_as(["fio_2"], "drb", HTTPStatus.INTERNAL_SERVER_ERROR) + + def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch): + @staticmethod + def mock_extract(_tarball_path: Path, _path: str) -> str: + return "IncorrectData" + + def mock_compare_csv_to_json( + self, benchmark_name, input_type, data_stream + ) -> JSON: + return {"status": "failed", "exception": "Unsupported Media Type"} + + monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset) + monkeypatch.setattr(self.MockTarball, "extract", mock_extract) + monkeypatch.setattr(Metadata, "getvalue", mock_get_value) + monkeypatch.setattr( + QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json + ) + query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR) + + def test_tarball_unpack_exception(self, query_get_as, monkeypatch): + @staticmethod + def mock_extract(_tarball_path: Path, _path: str): + raise TarballUnpackError( + _tarball_path, f"Testing unpack exception for path {_path}" + ) + + monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset) + monkeypatch.setattr(self.MockTarball, "extract", mock_extract) + monkeypatch.setattr(Metadata, "getvalue", mock_get_value) + query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR) + + @pytest.mark.parametrize( + "user,datasets,exp_status,exp_message", + ( + ( + "drb", + ["uperf_1", "nonexistent-dataset"], + HTTPStatus.BAD_REQUEST, + "Unrecognized list value ['nonexistent-dataset'] given for parameter datasets; expected Dataset", + ), + ( + "drb", + ["uperf_1", "uperf_2"], + HTTPStatus.FORBIDDEN, + "User drb is not authorized to READ a resource owned by test with private access", + ), + ( + "test", + ["uperf_1", "uperf_2"], + HTTPStatus.OK, + None, + ), + ( + None, + ["fio_1", "fio_2"], + HTTPStatus.OK, + None, + ), + ( + "test", + ["fio_1", "uperf_3"], + HTTPStatus.BAD_REQUEST, + "Selected dataset benchmarks must match: uperf and hammerDB cannot be compared.", + ), + ( + "test", + ["uperf_3", "uperf_4"], + HTTPStatus.UNSUPPORTED_MEDIA_TYPE, + "Unsupported Benchmark: hammerDB", + ), + ), + ) + def test_datasets_with_different_benchmark( + self, user, datasets, exp_status, exp_message, query_get_as, monkeypatch + ): + def mock_compare_csv_to_json( + self, benchmark_name, input_type, data_stream + ) -> JSON: + return {"status": "success", "json_data": "quisby_data"} + + monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset) + monkeypatch.setattr(Metadata, "getvalue", mock_get_value) + monkeypatch.setattr( + QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json + ) + + response = query_get_as(datasets, user, exp_status) + if exp_status == HTTPStatus.OK: + assert response.json["status"] == "success" + assert response.json["json_data"] == "quisby_data" + else: + assert response.json["message"] == exp_message diff --git a/lib/pbench/test/unit/server/test_endpoint_configure.py b/lib/pbench/test/unit/server/test_endpoint_configure.py index a1c14cb61d..5fff06cc42 100644 --- a/lib/pbench/test/unit/server/test_endpoint_configure.py +++ b/lib/pbench/test/unit/server/test_endpoint_configure.py @@ -59,6 +59,7 @@ def check_config(self, client, server_config, host, my_headers={}): "template": f"{uri}/datasets/{{dataset}}", "params": {"dataset": {"type": "string"}}, }, + "datasets_compare": {"template": f"{uri}/compare", "params": {}}, "datasets_contents": { "template": f"{uri}/datasets/{{dataset}}/contents/{{target}}", "params": {