Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PBENCH-1127 Implementation of Quisby API #3463

Merged
merged 15 commits into from
Jun 21, 2023
Merged
1 change: 1 addition & 0 deletions lib/pbench/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class API(Enum):
DATASETS_NAMESPACE = "datasets_namespace"
DATASETS_SEARCH = "datasets_search"
DATASETS_VALUES = "datasets_values"
DATASETS_VISUALIZE = "datasets_visualize"
ENDPOINTS = "endpoints"
KEY = "key"
RELAY = "relay"
Expand Down
7 changes: 7 additions & 0 deletions lib/pbench/server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pbench.server.api.resources.datasets_inventory import DatasetsInventory
from pbench.server.api.resources.datasets_list import DatasetsList
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
from pbench.server.api.resources.datasets_visualize import DatasetsVisualize
from pbench.server.api.resources.endpoint_configure import EndpointConfig
from pbench.server.api.resources.query_apis.dataset import Datasets
from pbench.server.api.resources.query_apis.datasets.datasets_contents import (
Expand Down Expand Up @@ -119,6 +120,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig):
endpoint="datasets_search",
resource_class_args=(config,),
)
api.add_resource(
DatasetsVisualize,
f"{base_uri}/datasets/<string:dataset>/visualize",
endpoint="datasets_visualize",
resource_class_args=(config,),
)
api.add_resource(
EndpointConfig,
f"{base_uri}/endpoints",
Expand Down
99 changes: 99 additions & 0 deletions lib/pbench/server/api/resources/datasets_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from http import HTTPStatus
from urllib.request import Request

from flask import current_app, jsonify
from flask.wrappers import Response
from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorizationType,
ApiBase,
ApiContext,
APIInternalError,
ApiMethod,
ApiParams,
ApiSchema,
Parameter,
ParamType,
Schema,
)
from pbench.server.cache_manager import (
CacheManager,
TarballNotFound,
TarballUnpackError,
)
from pbench.server.database import Dataset


class DatasetsVisualize(ApiBase):
"""
This class implements the Server API used to retrieve data for visualization.
"""

def __init__(self, config: PbenchServerConfig):
super().__init__(
config,
ApiSchema(
ApiMethod.GET,
OperationCode.READ,
uri_schema=Schema(
Parameter("dataset", ParamType.DATASET, required=True),
),
authorization=ApiAuthorizationType.DATASET,
),
)

def _get(
self, params: ApiParams, request: Request, context: ApiContext
) -> Response:
"""
This function is using Quisby to process results into a form that supports visualization

Args:
params: includes the uri parameters, which provide the dataset.
request: Original incoming Request object
context: API context dictionary

Raises:
APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR"

GET /api/v1/visualize/{dataset}
"""

dataset = params.uri["dataset"]
cache_m = CacheManager(self.config, current_app.logger)

try:
tarball = cache_m.find_dataset(dataset.resource_id)
except TarballNotFound as e:
raise APIAbort(
HTTPStatus.NOT_FOUND, f"No dataset with ID '{e.tarball}' found"
) from e

metadata = self._get_dataset_metadata(
dataset, ["dataset.metalog.pbench.script"]
)
benchmark = metadata["dataset.metalog.pbench.script"].upper()
benchmark_type = BenchmarkName.__members__.get(benchmark)
if not benchmark_type:
raise APIAbort(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}"
)

name = Dataset.stem(tarball.tarball_path)
try:
file = tarball.extract(tarball.tarball_path, f"{name}/result.csv")
except TarballUnpackError as e:
raise APIInternalError(str(e)) from e

get_quisby_data = QuisbyProcessing().extract_data(
benchmark_type, dataset.name, InputType.STREAM, file
)

if get_quisby_data["status"] != "success":
raise APIInternalError(
f"Quisby processing failure. Exception: {get_quisby_data['exception']}"
)
return jsonify(get_quisby_data)
4 changes: 2 additions & 2 deletions lib/pbench/server/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, tarball: str):
self.tarball = tarball

def __str__(self) -> str:
return f"The dataset tarball named {self.tarball!r} is not present in the cache manager"
return f"The dataset tarball named {self.tarball!r} is not found"


class DuplicateTarball(CacheManagerError):
Expand All @@ -59,7 +59,7 @@ def __init__(self, tarball: str):
self.tarball = tarball

def __str__(self) -> str:
return f"A dataset tarball named {self.tarball!r} is already present in the cache manager"
return f"A dataset tarball named {self.tarball!r} is already present"


class MetadataError(CacheManagerError):
Expand Down
12 changes: 3 additions & 9 deletions lib/pbench/test/unit/server/test_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def test_create_bad(
cm.create(tarball.tarball_path)
assert (
str(exc.value)
== "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present in the cache manager"
== "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present"
)
assert tarball.metadata == fake_get_metadata(tarball.tarball_path)
assert exc.value.tarball == tarball.name
Expand Down Expand Up @@ -924,10 +924,7 @@ def mock_run(args, **kwargs):
assert tarball == cm[md5]
with pytest.raises(TarballNotFound) as exc:
cm["foobar"]
assert (
str(exc.value)
== "The dataset tarball named 'foobar' is not present in the cache manager"
)
assert str(exc.value) == "The dataset tarball named 'foobar' is not found"

# Test __contains__
assert md5 in cm
Expand All @@ -946,10 +943,7 @@ def mock_run(args, **kwargs):
# Try to find a dataset that doesn't exist
with pytest.raises(TarballNotFound) as exc:
cm.find_dataset("foobar")
assert (
str(exc.value)
== "The dataset tarball named 'foobar' is not present in the cache manager"
)
assert str(exc.value) == "The dataset tarball named 'foobar' is not found"
assert exc.value.tarball == "foobar"

# Unpack the dataset, creating INCOMING and RESULTS links
Expand Down
2 changes: 1 addition & 1 deletion lib/pbench/test/unit/server/test_datasets_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_get_no_dataset(self, query_get_as):
def test_dataset_not_present(self, query_get_as):
response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND)
assert response.json == {
"message": "The dataset tarball named 'random_md5_string4' is not present in the cache manager"
"message": "The dataset tarball named 'random_md5_string4' is not found"
}

def test_unauthorized_access(self, query_get_as):
Expand Down
127 changes: 127 additions & 0 deletions lib/pbench/test/unit/server/test_datasets_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from http import HTTPStatus
from pathlib import Path

from pquisby.lib.post_processing import QuisbyProcessing
import pytest
import requests

from pbench.server import JSON
from pbench.server.api.resources import ApiBase
from pbench.server.cache_manager import CacheManager, Tarball
from pbench.server.database.models.datasets import Dataset, DatasetNotFound


class TestVisualize:
@pytest.fixture()
def query_get_as(self, client, server_config, more_datasets, get_token_func):
"""
Helper fixture to perform the API query and validate an expected
return status.

Args:
client: Flask test API client fixture
server_config: Pbench config fixture
more_datasets: Dataset construction fixture
get_token_func: Pbench token fixture
"""

def query_api(
dataset: str, user, expected_status: HTTPStatus
) -> requests.Response:
try:
dataset_id = Dataset.query(name=dataset).resource_id
except DatasetNotFound:
dataset_id = dataset # Allow passing deliberately bad value
headers = {"authorization": f"bearer {get_token_func(user)}"}
response = client.get(
f"{server_config.rest_uri}/datasets/{dataset_id}/visualize",
headers=headers,
)
assert response.status_code == expected_status
return response

return query_api

def mock_find_dataset(self, _dataset: str) -> Tarball:
class Tarball(object):
tarball_path = Path("/dataset/tarball.tar.xz")

def extract(_tarball_path: Path, _path: str) -> str:
return "CSV_file_as_a_byte_stream"

return Tarball

def mock_get_dataset_metadata(self, _dataset, _key) -> JSON:
return {"dataset.metalog.pbench.script": "uperf"}

def test_get_no_dataset(self, query_get_as):
response = query_get_as("nonexistent-dataset", "drb", HTTPStatus.NOT_FOUND)
assert response.json == {"message": "Dataset 'nonexistent-dataset' not found"}

def test_dataset_not_present(self, query_get_as):
response = query_get_as("fio_2", "drb", HTTPStatus.NOT_FOUND)
assert response.json == {
"message": "No dataset with ID 'random_md5_string4' found"
}

def test_unauthorized_access(self, query_get_as):
response = query_get_as("test", "drb", HTTPStatus.FORBIDDEN)
assert response.json == {
"message": "User drb is not authorized to READ a resource owned by test with private access"
}

def test_successful_get(self, query_get_as, monkeypatch):
def mock_extract_data(self, test_name, dataset_name, input_type, data) -> JSON:
return {"status": "success", "json_data": "quisby_data"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(
ApiBase, "_get_dataset_metadata", self.mock_get_dataset_metadata
)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)

response = query_get_as("uperf_1", "test", HTTPStatus.OK)
assert response.json["status"] == "success"
assert response.json["json_data"] == "quisby_data"

def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch):
def mock_find_dataset_with_incorrect_data(self, dataset) -> Tarball:
class Tarball(object):
tarball_path = Path("/dataset/tarball.tar.xz")

def extract(tarball_path, path) -> str:
return "IncorrectData"

return Tarball

def mock_extract_data(self, test_name, dataset_name, input_type, data) -> JSON:
return {"status": "failed", "exception": "Unsupported Media Type"}

monkeypatch.setattr(
CacheManager, "find_dataset", mock_find_dataset_with_incorrect_data
)
monkeypatch.setattr(
ApiBase, "_get_dataset_metadata", self.mock_get_dataset_metadata
)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.INTERNAL_SERVER_ERROR)
assert response.json["message"].startswith(
"Internal Pbench Server Error: log reference "
)

def test_unsupported_benchmark(self, query_get_as, monkeypatch):
flag = True
dbutenhof marked this conversation as resolved.
Show resolved Hide resolved

def mock_extract_data(*args, **kwargs) -> JSON:
nonlocal flag
flag = False

def mock_get_metadata(self, dataset, key) -> JSON:
return {"dataset.metalog.pbench.script": "hammerDB"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(ApiBase, "_get_dataset_metadata", mock_get_metadata)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.UNSUPPORTED_MEDIA_TYPE)
assert response.json["message"] == "Unsupported Benchmark: HAMMERDB"
assert flag is True
4 changes: 4 additions & 0 deletions lib/pbench/test/unit/server/test_endpoint_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def check_config(self, client, server_config, host, my_headers={}):
"dataset_view": {"type": "string"},
},
},
"datasets_visualize": {
"template": f"{uri}/datasets/{{dataset}}/visualize",
"params": {"dataset": {"type": "string"}},
},
"endpoints": {"template": f"{uri}/endpoints", "params": {}},
"key": {
"template": f"{uri}/key/{{key}}",
Expand Down
2 changes: 1 addition & 1 deletion server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ python-dateutil
requests # TODO CVE-2023-32681 (>=2.31.0)
sdnotify
sqlalchemy>=1.4.23
sqlalchemy_utils>=0.37.6
sqlalchemy_utils>=0.37.6