Skip to content

Commit

Permalink
Compare datasets - Integrate Quisby into Pbench Server API (#3470)
Browse files Browse the repository at this point in the history
PBENCH-1189
---------

Co-authored-by: siddardh <sira@redhat27!>
  • Loading branch information
siddardh-ra and siddardh authored Jun 26, 2023
1 parent 999f797 commit 1eebe0f
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/pbench/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class API(Enum):
"""

DATASETS = "datasets"
DATASETS_COMPARE = "datasets_compare"
DATASETS_CONTENTS = "datasets_contents"
DATASETS_DETAIL = "datasets_detail"
DATASETS_INVENTORY = "datasets_inventory"
Expand Down
7 changes: 7 additions & 0 deletions lib/pbench/server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pbench.common.logger import get_pbench_logger
from pbench.server import PbenchServerConfig
from pbench.server.api.resources.api_key import APIKeyManage
from pbench.server.api.resources.datasets_compare import DatasetsCompare
from pbench.server.api.resources.datasets_inventory import DatasetsInventory
from pbench.server.api.resources.datasets_list import DatasetsList
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
Expand Down Expand Up @@ -63,6 +64,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig):
endpoint="datasets",
resource_class_args=(config,),
)
api.add_resource(
DatasetsCompare,
f"{base_uri}/compare",
endpoint="datasets_compare",
resource_class_args=(config,),
)
api.add_resource(
DatasetsContents,
f"{base_uri}/datasets/<string:dataset>/contents/",
Expand Down
125 changes: 125 additions & 0 deletions lib/pbench/server/api/resources/datasets_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from http import HTTPStatus
from urllib.request import Request

from flask import current_app, jsonify
from flask.wrappers import Response
from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorization,
ApiAuthorizationType,
ApiBase,
ApiContext,
APIInternalError,
ApiMethod,
ApiParams,
ApiSchema,
Parameter,
ParamType,
Schema,
)
from pbench.server.cache_manager import (
CacheManager,
TarballNotFound,
TarballUnpackError,
)
from pbench.server.database.models.datasets import Metadata


class DatasetsCompare(ApiBase):
"""
This class implements the Server API used to retrieve comparison data for visualization.
"""

def __init__(self, config: PbenchServerConfig):
super().__init__(
config,
ApiSchema(
ApiMethod.GET,
OperationCode.READ,
query_schema=Schema(
Parameter(
"datasets",
ParamType.LIST,
element_type=ParamType.DATASET,
string_list=",",
required=True,
),
),
authorization=ApiAuthorizationType.NONE,
),
)

def _get(
self, params: ApiParams, request: Request, context: ApiContext
) -> Response:
"""
This function is using Quisby to compare results into a form that supports visualization
Args:
params: includes the uri parameters, which provide the list of dataset.
request: Original incoming Request object
context: API context dictionary
Raises:
UnauthorizedAccess : The user isn't authorized for the requested access.
APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR"
APIInternalError, reporting the failure message
GET /api/v1/compare?datasets=d1,d2,d3
"""

datasets = params.query.get("datasets")
benchmark_choice = None
for dataset in datasets:
benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
# Validate if all the selected datasets is of same benchmark
if not benchmark_choice:
benchmark_choice = benchmark
elif benchmark != benchmark_choice:
raise APIAbort(
HTTPStatus.BAD_REQUEST,
f"Selected dataset benchmarks must match: {benchmark_choice} and {benchmark} cannot be compared.",
)

# Validate if the user is authorized to access the selected datasets
self._check_authorization(
ApiAuthorization(
ApiAuthorizationType.USER_ACCESS,
OperationCode.READ,
dataset.owner_id,
dataset.access,
)
)
cache_m = CacheManager(self.config, current_app.logger)
stream_file = {}
for dataset in datasets:
try:
tarball = cache_m.find_dataset(dataset.resource_id)
except TarballNotFound as e:
raise APIInternalError(
f"Expected dataset with ID '{dataset.resource_id}' is missing from the cache manager."
) from e
try:
file = tarball.extract(
tarball.tarball_path, f"{tarball.name}/result.csv"
)
except TarballUnpackError as e:
raise APIInternalError(str(e)) from e
stream_file[dataset.name] = file

benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
if not benchmark_type:
raise APIAbort(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}"
)
get_quisby_data = QuisbyProcessing().compare_csv_to_json(
benchmark_type, InputType.STREAM, stream_file
)
if get_quisby_data["status"] != "success":
raise APIInternalError(
f"Quisby processing failure. Exception: {get_quisby_data['exception']}"
)
return jsonify(get_quisby_data)
166 changes: 166 additions & 0 deletions lib/pbench/test/unit/server/test_datasets_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from http import HTTPStatus
from pathlib import Path
from typing import Optional

from pquisby.lib.post_processing import QuisbyProcessing
import pytest
import requests

from pbench.server import JSON
from pbench.server.cache_manager import CacheManager, TarballUnpackError
from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata
from pbench.server.database.models.users import User


def mock_get_value(dataset: Dataset, key: str, user: Optional[User] = None) -> str:
if dataset.name == "uperf_3" or dataset.name == "uperf_4":
return "hammerDB"
return "uperf"


class TestCompareDatasets:
@pytest.fixture()
def query_get_as(self, client, server_config, more_datasets, get_token_func):
"""
Helper fixture to perform the API query and validate an expected
return status.
Args:
client: Flask test API client fixture
server_config: Pbench config fixture
more_datasets: Dataset construction fixture
get_token_func: Pbench token fixture
"""

def query_api(
datasets: list, user: str, expected_status: HTTPStatus
) -> requests.Response:
ds_list = []
for dataset in datasets:
try:
dataset_id = Dataset.query(name=dataset).resource_id
ds_list.append(dataset_id)
except DatasetNotFound:
ds_list.append(dataset) # Allow passing deliberately bad value
headers = None
if user:
headers = {"authorization": f"bearer {get_token_func(user)}"}
response = client.get(
f"{server_config.rest_uri}/compare",
query_string={"datasets": ds_list},
headers=headers,
)
assert response.status_code == expected_status
return response

return query_api

class MockTarball:
tarball_path = Path("/dataset/tarball.tar.xz")
name = "tarball"

@staticmethod
def extract(_tarball_path: Path, _path: str) -> str:
return "CSV_file_as_a_string"

def mock_find_dataset(self, dataset) -> MockTarball:
# Validate the resource_id
Dataset.query(resource_id=dataset)
return self.MockTarball()

def test_dataset_not_present(self, query_get_as, monkeypatch):
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)

query_get_as(["fio_2"], "drb", HTTPStatus.INTERNAL_SERVER_ERROR)

def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch):
@staticmethod
def mock_extract(_tarball_path: Path, _path: str) -> str:
return "IncorrectData"

def mock_compare_csv_to_json(
self, benchmark_name, input_type, data_stream
) -> JSON:
return {"status": "failed", "exception": "Unsupported Media Type"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(self.MockTarball, "extract", mock_extract)
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
monkeypatch.setattr(
QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
)
query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)

def test_tarball_unpack_exception(self, query_get_as, monkeypatch):
@staticmethod
def mock_extract(_tarball_path: Path, _path: str):
raise TarballUnpackError(
_tarball_path, f"Testing unpack exception for path {_path}"
)

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(self.MockTarball, "extract", mock_extract)
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)

@pytest.mark.parametrize(
"user,datasets,exp_status,exp_message",
(
(
"drb",
["uperf_1", "nonexistent-dataset"],
HTTPStatus.BAD_REQUEST,
"Unrecognized list value ['nonexistent-dataset'] given for parameter datasets; expected Dataset",
),
(
"drb",
["uperf_1", "uperf_2"],
HTTPStatus.FORBIDDEN,
"User drb is not authorized to READ a resource owned by test with private access",
),
(
"test",
["uperf_1", "uperf_2"],
HTTPStatus.OK,
None,
),
(
None,
["fio_1", "fio_2"],
HTTPStatus.OK,
None,
),
(
"test",
["fio_1", "uperf_3"],
HTTPStatus.BAD_REQUEST,
"Selected dataset benchmarks must match: uperf and hammerDB cannot be compared.",
),
(
"test",
["uperf_3", "uperf_4"],
HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
"Unsupported Benchmark: hammerDB",
),
),
)
def test_datasets_with_different_benchmark(
self, user, datasets, exp_status, exp_message, query_get_as, monkeypatch
):
def mock_compare_csv_to_json(
self, benchmark_name, input_type, data_stream
) -> JSON:
return {"status": "success", "json_data": "quisby_data"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
monkeypatch.setattr(
QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
)

response = query_get_as(datasets, user, exp_status)
if exp_status == HTTPStatus.OK:
assert response.json["status"] == "success"
assert response.json["json_data"] == "quisby_data"
else:
assert response.json["message"] == exp_message
1 change: 1 addition & 0 deletions lib/pbench/test/unit/server/test_endpoint_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def check_config(self, client, server_config, host, my_headers={}):
"template": f"{uri}/datasets/{{dataset}}",
"params": {"dataset": {"type": "string"}},
},
"datasets_compare": {"template": f"{uri}/compare", "params": {}},
"datasets_contents": {
"template": f"{uri}/datasets/{{dataset}}/contents/{{target}}",
"params": {
Expand Down

0 comments on commit 1eebe0f

Please sign in to comment.