From 520f9ecf48447d5916396decf7d1189796ff62b7 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 6 Apr 2023 09:32:59 -0400 Subject: [PATCH 1/4] Integrate daterange into datasets API PBENCH-1125 Remove the `GET /datasets/daterange` API in favor of a new query parameter, as `GET /datasets?daterange`. This allows getting the actual date range for any filtered selection of datasets. --- docs/API/V1/daterange.md | 58 ------- docs/API/V1/list.md | 17 ++ lib/pbench/client/__init__.py | 1 - lib/pbench/server/api/__init__.py | 7 - .../api/resources/datasets_daterange.py | 68 -------- .../server/api/resources/datasets_list.py | 26 ++- .../unit/server/test_datasets_daterange.py | 150 ------------------ .../test/unit/server/test_datasets_list.py | 68 ++++++++ .../unit/server/test_endpoint_configure.py | 4 - 9 files changed, 110 insertions(+), 289 deletions(-) delete mode 100644 docs/API/V1/daterange.md delete mode 100644 lib/pbench/server/api/resources/datasets_daterange.py delete mode 100644 lib/pbench/test/unit/server/test_datasets_daterange.py diff --git a/docs/API/V1/daterange.md b/docs/API/V1/daterange.md deleted file mode 100644 index 6bd44ec47a..0000000000 --- a/docs/API/V1/daterange.md +++ /dev/null @@ -1,58 +0,0 @@ -# `GET /api/v1/datasets/daterange` - -This API returns the range of creation dates for all datasets accessible to the -authenticated client, optionally filtered by owner and/or access policy. - -For example, this can be used to initialize a date picker. - -## Query parameters - -`access` string \ -Select whether only `private` or only `public` access datasets will be included -in the list. By default, all datasets readable by the authenticated user are -included. For example, without constraints `/datasets/daterange` for an -authenticated user will include all `public` datasets plus all datasets owned -by the authenticated user; specifying `private` will show only the authenticated -user's private datasets, while specifying `public` will show only `public` -datasets (regardless of ownership). - -`owner` string \ -Select only datasets owned by the specified username. Unless the username -matches the authenticated user, only "public" datasets can be selected. - -## Response status - -`200` **OK** \ -Successful request. - -`401` **UNAUTHORIZED** \ -The client did not provide an authentication token but asked to filter datasets -by `owner` or `access=private`. - -`403` **FORBIDDEN** \ -The client asked to filter `access=private` datasets or by `owner` for which -the client does not have READ access. - -`503` **SERVICE UNAVAILABLE** \ -The server has been disabled using the `server-state` server configuration -setting in the [server configuration](./server_config.md) API. The response -body is an `application/json` document describing the current server state, -a message, and optional JSON data provided by the system administrator. - -## Response headers - -`content-type: application/json` \ -The return is a JSON document containing the date range of datasets on the -Pbench Server. - -## Response body - -The `application/json` response body is a JSON object describing the earliest -and most recent dataset upload time on the Pbench Server. - -```json -{ - "from": "2023-03-17T03:14:02.013184+00:00", - "to": "2023-04-05T11:29:02.585772+00:00" -} -``` diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md index 2aa7290365..35465c07a8 100644 --- a/docs/API/V1/list.md +++ b/docs/API/V1/list.md @@ -22,6 +22,11 @@ authenticated user; specifying `private` will show only the authenticated user's private datasets, while specifying `public` will show only `public` datasets (regardless of ownership). +`daterange` boolean \ +Instead of returning a filtered set of datasets, return only the upload +timestamps of the oldest and most recent datasets in the filtered set. This +can be useful for initializing a date picker, for example. + `end` date/time \ Select only datasets created on or before the specified time. Time should be specified in ISO standard format, as `YYYY-MM-DDThh:mm:ss.ffffff[+|-]HH:MM`. @@ -147,6 +152,18 @@ a message, and optional JSON data provided by the system administrator. ## Response body +### Dataset date range + +The `application/json` response body is a JSON object describing the earliest +and most recent dataset upload time for the selected list of datasets. + +```json +{ + "from": "2023-03-17T03:14:02.013184+00:00", + "to": "2023-04-05T11:29:02.585772+00:00" +} +``` + ### Dataset list The `application/json` response body contains a list of objects which describe diff --git a/lib/pbench/client/__init__.py b/lib/pbench/client/__init__.py index 6537f80cd1..9121aac3f2 100644 --- a/lib/pbench/client/__init__.py +++ b/lib/pbench/client/__init__.py @@ -39,7 +39,6 @@ class API(Enum): DATASETS = "datasets" DATASETS_CONTENTS = "datasets_contents" - DATASETS_DATERANGE = "datasets_daterange" DATASETS_DETAIL = "datasets_detail" DATASETS_INVENTORY = "datasets_inventory" DATASETS_LIST = "datasets_list" diff --git a/lib/pbench/server/api/__init__.py b/lib/pbench/server/api/__init__.py index 12b3c2358c..467fe7c082 100644 --- a/lib/pbench/server/api/__init__.py +++ b/lib/pbench/server/api/__init__.py @@ -13,7 +13,6 @@ from pbench.common.exceptions import ConfigFileNotSpecified from pbench.common.logger import get_pbench_logger from pbench.server import PbenchServerConfig -from pbench.server.api.resources.datasets_daterange import DatasetsDateRange from pbench.server.api.resources.datasets_inventory import DatasetsInventory from pbench.server.api.resources.datasets_list import DatasetsList from pbench.server.api.resources.datasets_metadata import DatasetsMetadata @@ -68,12 +67,6 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig): endpoint="datasets_contents", resource_class_args=(config,), ) - api.add_resource( - DatasetsDateRange, - f"{base_uri}/datasets/daterange", - endpoint="datasets_daterange", - resource_class_args=(config,), - ) api.add_resource( DatasetsDetail, f"{base_uri}/datasets//detail", diff --git a/lib/pbench/server/api/resources/datasets_daterange.py b/lib/pbench/server/api/resources/datasets_daterange.py deleted file mode 100644 index 8944ee86eb..0000000000 --- a/lib/pbench/server/api/resources/datasets_daterange.py +++ /dev/null @@ -1,68 +0,0 @@ -from flask.json import jsonify -from flask.wrappers import Request, Response -from sqlalchemy import func - -from pbench.server import OperationCode, PbenchServerConfig -from pbench.server.api.resources import ( - ApiAuthorizationType, - ApiBase, - ApiContext, - ApiMethod, - ApiParams, - ApiSchema, - Parameter, - ParamType, - Schema, -) -from pbench.server.database.database import Database -from pbench.server.database.models.datasets import Dataset - - -class DatasetsDateRange(ApiBase): - """ - API class to retrieve the available date range of accessible datasets. - """ - - def __init__(self, config: PbenchServerConfig): - super().__init__( - config, - ApiSchema( - ApiMethod.GET, - OperationCode.READ, - query_schema=Schema( - Parameter("owner", ParamType.USER, required=False), - Parameter("access", ParamType.ACCESS, required=False), - ), - authorization=ApiAuthorizationType.USER_ACCESS, - ), - ) - - def _get( - self, params: ApiParams, request: Request, context: ApiContext - ) -> Response: - """ - Get the date range for which datasets are available to the client based - on authentication plus optional dataset owner and access criteria. - - Args: - json_data: Ignored because GET has no JSON payload - request: The original Request object containing query parameters - context: API context dictionary - - GET /api/v1/datasets/daterange?owner=user&access=public - """ - - access = params.query.get("access") - owner = params.query.get("owner") - - # Build a SQLAlchemy Query object expressing all of our constraints - query = Database.db_session.query( - func.min(Dataset.uploaded), func.max(Dataset.uploaded) - ) - query = self._build_sql_query(owner, access, query) - - # Execute the query, returning a tuple of the 'min' date and the - # 'max' date. - results = query.first() - - return jsonify({"from": results[0].isoformat(), "to": results[1].isoformat()}) diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py index 375ffe5857..86f1a09bd8 100644 --- a/lib/pbench/server/api/resources/datasets_list.py +++ b/lib/pbench/server/api/resources/datasets_list.py @@ -5,7 +5,7 @@ from flask import current_app from flask.json import jsonify from flask.wrappers import Request, Response -from sqlalchemy import and_, cast, or_, String +from sqlalchemy import and_, cast, func, or_, String from sqlalchemy.exc import ProgrammingError, StatementError from sqlalchemy.orm import aliased, Query from sqlalchemy.sql.expression import Alias @@ -69,6 +69,7 @@ def __init__(self, config: PbenchServerConfig): Parameter("offset", ParamType.INT), Parameter("limit", ParamType.INT), # Output control + Parameter("daterange", ParamType.BOOLEAN), Parameter("keysummary", ParamType.BOOLEAN), Parameter( "metadata", @@ -348,6 +349,27 @@ def keyspace(self, query: Query) -> JSONOBJECT: self.accumulate(aggregate, m.key, m.value) return aggregate + def daterange(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT: + """Return only the date range of the selected datasets. + + Replace the selected "entities" (normally Dataset columns) with the + SQL min and max functions on the dataset upload timestamp so that the + generated SQL query will return a tuple of those two values. + + Args: + request: The HTTP Request object + json: The JSON query parameters + query: The basic filtered SQLAlchemy query object + + Returns: + The date range of the selected datasets + """ + results = query.with_entities( + func.min(Dataset.uploaded), func.max(Dataset.uploaded) + ).first() + + return {"from": results[0].isoformat(), "to": results[1].isoformat()} + def datasets(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT: """Gather and paginate the selected datasets @@ -497,5 +519,7 @@ def _get( query = self._build_sql_query(owner, json.get("access"), query) if json.get("keysummary"): return jsonify(self.keyspace(query)) + elif json.get("daterange"): + return jsonify(self.daterange(request, json, query)) else: return jsonify(self.datasets(request, json, query)) diff --git a/lib/pbench/test/unit/server/test_datasets_daterange.py b/lib/pbench/test/unit/server/test_datasets_daterange.py deleted file mode 100644 index af9ae7faa6..0000000000 --- a/lib/pbench/test/unit/server/test_datasets_daterange.py +++ /dev/null @@ -1,150 +0,0 @@ -import datetime -from http import HTTPStatus -from typing import Dict, List - -import pytest -import requests - -from pbench.server import JSON -from pbench.server.database.models.datasets import Dataset - - -class TestDatasetsDateRange: - """ - Test the `datasets/daterange` API. We perform a variety of queries using a - set of datasets provided by the `attach_dataset` fixture and the - `more_datasets` fixture. - """ - - @pytest.fixture() - def query_as( - self, - client, - server_config, - more_datasets, - provide_metadata, - get_token_func, - add_auth_connection_mock, - ): - """ - Helper fixture to perform the API query and validate an expected - return status. - - Args: - client: Flask test API client fixture - server_config: Pbench config fixture - more_datasets: Dataset construction fixture - provide_metadata: Dataset metadata fixture - get_token_func: Pbench token fixture - """ - - def query_api( - payload: JSON, username: str, expected_status: HTTPStatus - ) -> requests.Response: - token = get_token_func(username) - response = client.get( - f"{server_config.rest_uri}/datasets/daterange", - headers={"authorization": f"bearer {token}"}, - query_string=payload, - ) - assert response.status_code == expected_status - return response - - return query_api - - def get_results(self, name_list: List[str]) -> Dict[str, datetime.datetime]: - """ - Use a list of "expected results" to determine the earliest and the - latest creation date of the set of datasets. - - Args: - name_list: List of dataset names - - Returns: - {"from": first_date, "to": last_date} - """ - from_time = datetime.datetime.now(datetime.timezone.utc) - to_time = datetime.datetime( - year=1970, month=1, day=1, tzinfo=datetime.timezone.utc - ) - for name in sorted(name_list): - dataset = Dataset.query(name=name) - to_time = max(dataset.uploaded, to_time) - from_time = min(dataset.uploaded, from_time) - return {"from": from_time.isoformat(), "to": to_time.isoformat()} - - @pytest.mark.parametrize( - "login,query,results", - [ - ("drb", {"owner": "drb"}, ["drb", "fio_1"]), - ("drb", {"access": "public"}, ["fio_1", "fio_2"]), - ("test_admin", {"owner": "drb"}, ["drb", "fio_1"]), - ("drb", {}, ["drb", "fio_1", "fio_2"]), - ( - "test", - {}, - ["test", "fio_1", "fio_2", "uperf_1", "uperf_2", "uperf_3", "uperf_4"], - ), - ( - "test_admin", - {}, - [ - "drb", - "test", - "fio_1", - "fio_2", - "uperf_1", - "uperf_2", - "uperf_3", - "uperf_4", - ], - ), - ], - ) - def test_dataset_daterange(self, query_as, login, query, results): - """ - Test the operation of `datasets/daterange` against our set of test - datasets. - - Args: - query_as: A fixture to provide a helper that executes the API call - login: The username as which to perform a query - query: A JSON representation of the query parameters (these will be - automatically supplemented with a metadata request term) - results: A list of the dataset names we expect to be returned - """ - result = query_as(query, login, HTTPStatus.OK) - assert result.json == self.get_results(results) - - def test_get_unknown_keys(self, query_as): - """ - Test case requesting non-existent query parameter keys. - - Args: - query_as: Query helper fixture - """ - response = query_as( - {"plugh": "xyzzy", "passages": "twisty"}, - "drb", - HTTPStatus.BAD_REQUEST, - ) - assert response.json == {"message": "Unknown URL query keys: passages,plugh"} - - def test_get_repeat_keys(self, query_as): - """ - Test case requesting repeated single-value query param keys. - - NOTE that the request package processes a list of values for a query - parameter by repeating the key name with each value since the HTTP - standard doesn't cover multiple values for a single key; so - "name": ["one", "two"] will appear to the API as "?name=one&name=two". - - Args: - query_as: Query helper fixture - """ - response = query_as( - {"owner": ["one", "two"]}, - "drb", - HTTPStatus.BAD_REQUEST, - ) - assert response.json == {"message": "Repeated URL query key 'owner'"} diff --git a/lib/pbench/test/unit/server/test_datasets_list.py b/lib/pbench/test/unit/server/test_datasets_list.py index e2aa70e45b..90a39e2798 100644 --- a/lib/pbench/test/unit/server/test_datasets_list.py +++ b/lib/pbench/test/unit/server/test_datasets_list.py @@ -698,3 +698,71 @@ def test_key_summary(self, query_as): "origin": None, }, } + + def get_daterange_results( + self, name_list: list[str] + ) -> dict[str, datetime.datetime]: + """ + Use a list of "expected results" to determine the earliest and the + latest creation date of the set of datasets. + + Args: + name_list: List of dataset names + + Returns: + {"from": first_date, "to": last_date} + """ + from_time = datetime.datetime.now(datetime.timezone.utc) + to_time = datetime.datetime( + year=1970, month=1, day=1, tzinfo=datetime.timezone.utc + ) + for name in sorted(name_list): + dataset = Dataset.query(name=name) + to_time = max(dataset.uploaded, to_time) + from_time = min(dataset.uploaded, from_time) + return {"from": from_time.isoformat(), "to": to_time.isoformat()} + + @pytest.mark.parametrize( + "login,query,results", + [ + ("drb", {"owner": "drb"}, ["drb", "fio_1"]), + ("drb", {"mine": "true"}, ["drb", "fio_1"]), + ("drb", {"access": "public"}, ["fio_1", "fio_2"]), + ("test_admin", {"owner": "drb"}, ["drb", "fio_1"]), + ("drb", {}, ["drb", "fio_1", "fio_2"]), + ( + "test", + {}, + ["test", "fio_1", "fio_2", "uperf_1", "uperf_2", "uperf_3", "uperf_4"], + ), + ( + "test_admin", + {}, + [ + "drb", + "test", + "fio_1", + "fio_2", + "uperf_1", + "uperf_2", + "uperf_3", + "uperf_4", + ], + ), + ], + ) + def test_dataset_daterange(self, query_as, login, query, results): + """ + Test the operation of `GET datasets?daterange` against our set of test + datasets. + + Args: + query_as: A fixture to provide a helper that executes the API call + login: The username as which to perform a query + query: A JSON representation of the query parameters (these will be + automatically supplemented with the "daterange" parameter) + results: A list of the dataset names we expect to be returned + """ + query["daterange"] = "true" + result = query_as(query, login, HTTPStatus.OK) + assert result.json == self.get_daterange_results(results) diff --git a/lib/pbench/test/unit/server/test_endpoint_configure.py b/lib/pbench/test/unit/server/test_endpoint_configure.py index 9908c2b258..046d720fcf 100644 --- a/lib/pbench/test/unit/server/test_endpoint_configure.py +++ b/lib/pbench/test/unit/server/test_endpoint_configure.py @@ -53,10 +53,6 @@ def check_config(self, client, server_config, host, my_headers={}): "target": {"type": "path"}, }, }, - "datasets_daterange": { - "template": f"{uri}/datasets/daterange", - "params": {}, - }, "datasets_detail": { "template": f"{uri}/datasets/{{dataset}}/detail", "params": {"dataset": {"type": "string"}}, From 4e79e67fcc9cf4386d693aff1301c67a7f8d182a Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 6 Apr 2023 16:33:59 -0400 Subject: [PATCH 2/4] Allow both summary modes --- docs/API/V1/list.md | 193 ++++++++++-------- .../server/api/resources/datasets_list.py | 32 ++- .../test/unit/server/test_datasets_list.py | 108 +++++++--- 3 files changed, 203 insertions(+), 130 deletions(-) diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md index 35465c07a8..b25ad89885 100644 --- a/docs/API/V1/list.md +++ b/docs/API/V1/list.md @@ -11,6 +11,11 @@ and arbitrary metadata filter expressions. Large collections can be paginated for efficiency using the `limit` and `offset` query parameters. +The `keysummary` and `daterange` query parameters (if `true`) select "summary" +modes where aggregate metadata is returned without a list of datasets. These two +may be used together, but cannot be used along with the normal collection list +mode as they aren't subject to pagination. + ## Query parameters `access` string \ @@ -65,6 +70,15 @@ case, a boolean, which is represented in JSON as `true` or `false`). Beware especially when attempting to match a JSON document (such as `dataset.metalog.pbench`). +`keysummary` boolean \ +Instead of displaying a list of selected datasets and metadata, use the set of +specified filters to accumulate a nested report on the metadata key namespace +for the set of datasets. See [metadata](../metadata.md) for deails on the +Pbench Server metadata namespaces. Because the `global` and `user` namespaces +are completely dynamic, and the `dataset.metalog` sub-namespace varies greatly +across Pbench Agent benchmark scripts, this mode provides a mechanism for a +metadata visualizer to understand what's available for a set of datasets. + `limit` integer \ "Paginate" the selected datasets by returning at most `limit` datasets. This can be used in conjunction with `offset` to progress through the full list in @@ -102,15 +116,6 @@ If the timezone offset is omitted it will be assumed to be UTC (`+00:00`); if the time is omitted it will be assumed as midnight (`00:00:00`) on the specified date. -`keysummary` boolean \ -Instead of displaying a list of selected datasets and metadata, use the set of -specified filters to accumulate a nested report on the metadata key namespace -for the set of datasets. See [metadata](../metadata.md) for deails on the -Pbench Server metadata namespaces. Because the `global` and `user` namespaces -are completely dynamic, and the `dataset.metalog` sub-namespace varies greatly -across Pbench Agent benchmark scripts, this mode provides a mechanism for a -metadata visualizer to understand what's available for a set of datasets. - ## Request headers `authorization: bearer` token [_optional_] \ @@ -155,7 +160,9 @@ a message, and optional JSON data provided by the system administrator. ### Dataset date range The `application/json` response body is a JSON object describing the earliest -and most recent dataset upload time for the selected list of datasets. +and most recent dataset upload time for the selected list of datasets. If the +collection filters exclude all datasets (the result set is empty), the return +value will be empty, omitting both the `from` and `to` keywords. ```json { @@ -234,9 +241,9 @@ might return: When the `keysummary` query parameter is `true` (e.g., either `?keysummary` or `?keysummary=true`), instead of reporting a list of datasets and metadata for -each dataset, report a hierarchical representation of the aggregate metadata -namespace across all selected datasets. This returns much less data and is not -subject to pagination. +each dataset, the `application/json` response body contains a hierarchical +representation of the aggregate metadata namespace across all selected datasets. +This returns much less data and is not subject to pagination. "Leaf" nodes in the metadata tree are represented by `null` values while any key with children will be represented as a nested JSON object showing those @@ -252,83 +259,93 @@ will return a JSON document with the keys `config`, `date`, `hostname_f`, ```json { - "dataset": { - "access": null, - "id": null, - "metalog": { - "controller": { - "hostname": null, - "hostname-alias": null, - "hostname-all-fqdns": null, - "hostname-all-ip-addresses": null, - "hostname-domain": null, - "hostname-fqdn": null, - "hostname-ip-address": null, - "hostname-nis": null, - "hostname-short": null, - "ssh_opts": null - }, - "iterations/1-default": { - "iteration_name": null, - "iteration_number": null, - "user_script": null - }, - "pbench": { - "config": null, - "date": null, - "hostname_f": null, - "hostname_ip": null, - "hostname_s": null, - "iterations": null, - "name": null, - "rpm-version": null, - "script": null, - "tar-ball-creation-timestamp": null - }, - "run": { - "controller": null, - "end_run": null, - "raw_size": null, - "start_run": null + "keys": { + "dataset": { + "access": null, + "id": null, + "metalog": { + "controller": { + "hostname": null, + "hostname-alias": null, + "hostname-all-fqdns": null, + "hostname-all-ip-addresses": null, + "hostname-domain": null, + "hostname-fqdn": null, + "hostname-ip-address": null, + "hostname-nis": null, + "hostname-short": null, + "ssh_opts": null + }, + "iterations/1-default": { + "iteration_name": null, + "iteration_number": null, + "user_script": null + }, + "pbench": { + "config": null, + "date": null, + "hostname_f": null, + "hostname_ip": null, + "hostname_s": null, + "iterations": null, + "name": null, + "rpm-version": null, + "script": null, + "tar-ball-creation-timestamp": null + }, + "run": { + "controller": null, + "end_run": null, + "raw_size": null, + "start_run": null + }, + "tools": { + "group": null, + "hosts": null, + "trigger": null + }, + "tools/dbutenho.bos.csb": { + "hostname-alias": null, + "hostname-all-fqdns": null, + "hostname-all-ip-addresses": null, + "hostname-domain": null, + "hostname-fqdn": null, + "hostname-ip-address": null, + "hostname-nis": null, + "hostname-short": null, + "label": null, + "rpm-version": null, + "tools": null, + "vmstat": null + }, + "tools/dbutenho.bos.csb/vmstat": { + "install_check_output": null, + "install_check_status_code": null, + "options": null + } }, - "tools": { - "group": null, - "hosts": null, - "trigger": null - }, - "tools/dbutenho.bos.csb": { - "hostname-alias": null, - "hostname-all-fqdns": null, - "hostname-all-ip-addresses": null, - "hostname-domain": null, - "hostname-fqdn": null, - "hostname-ip-address": null, - "hostname-nis": null, - "hostname-short": null, - "label": null, - "rpm-version": null, - "tools": null, - "vmstat": null - }, - "tools/dbutenho.bos.csb/vmstat": { - "install_check_output": null, - "install_check_status_code": null, - "options": null - } + "name": null, + "owner_id": null, + "resource_id": null, + "uploaded": null }, - "name": null, - "owner_id": null, - "resource_id": null, - "uploaded": null - }, - "server": { - "deletion": null, - "index-map": { - "container-pbench.v6.run-data.2023-03": null, - "container-pbench.v6.run-toc.2023-03": null - }, - "origin": null, - "tarball-path": null + "server": { + "deletion": null, + "index-map": { + "container-pbench.v6.run-data.2023-03": null, + "container-pbench.v6.run-toc.2023-03": null + }, + "origin": null, + "tarball-path": null + } } } ``` + +### Combining key namespace summary and date range + +When both the `keysummary` and `daterange` query parameters are `true`, the +`application/json` response body contains the `from`, `to`, and `keys` key +values. If the selected collection filters produce no results, as with +`daterange` alone, the `from` and `to` keys will be omitted and the value of +`keys` will be an empty object. diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py index 86f1a09bd8..4123c8a528 100644 --- a/lib/pbench/server/api/resources/datasets_list.py +++ b/lib/pbench/server/api/resources/datasets_list.py @@ -347,9 +347,9 @@ def keyspace(self, query: Query) -> JSONOBJECT: self.accumulate(aggregate["dataset"], m.key, m.value) else: self.accumulate(aggregate, m.key, m.value) - return aggregate + return {"keys": aggregate} - def daterange(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT: + def daterange(self, query: Query) -> JSONOBJECT: """Return only the date range of the selected datasets. Replace the selected "entities" (normally Dataset columns) with the @@ -357,8 +357,6 @@ def daterange(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJ generated SQL query will return a tuple of those two values. Args: - request: The HTTP Request object - json: The JSON query parameters query: The basic filtered SQLAlchemy query object Returns: @@ -368,7 +366,10 @@ def daterange(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJ func.min(Dataset.uploaded), func.max(Dataset.uploaded) ).first() - return {"from": results[0].isoformat(), "to": results[1].isoformat()} + if results and results[0] and results[1]: + return {"from": results[0].isoformat(), "to": results[1].isoformat()} + else: + return {} def datasets(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT: """Gather and paginate the selected datasets @@ -517,9 +518,20 @@ def _get( else: owner = json.get("owner") query = self._build_sql_query(owner, json.get("access"), query) + result = {} + done = False + + # We can do "keysummary" and "daterange", but, as it makes no real + # sense to paginate either, we don't support them in combination with + # a normal list query. So we will perform either/or keysummary and + # daterange, and acquire a normal list of datasets only if neither was + # specified. if json.get("keysummary"): - return jsonify(self.keyspace(query)) - elif json.get("daterange"): - return jsonify(self.daterange(request, json, query)) - else: - return jsonify(self.datasets(request, json, query)) + result.update(self.keyspace(query)) + done = True + if json.get("daterange"): + result.update(self.daterange(query)) + done = True + if not done: + result = self.datasets(request, json, query) + return jsonify(result) diff --git a/lib/pbench/test/unit/server/test_datasets_list.py b/lib/pbench/test/unit/server/test_datasets_list.py index 90a39e2798..5bd460c70e 100644 --- a/lib/pbench/test/unit/server/test_datasets_list.py +++ b/lib/pbench/test/unit/server/test_datasets_list.py @@ -670,33 +670,35 @@ def test_key_summary(self, query_as): Metadata.setvalue(dataset=fio_1, key="global.legacy.server", value="ABC") response = query_as({"keysummary": "true"}, "drb", HTTPStatus.OK) assert response.json == { - "dataset": { - "access": None, - "id": None, - "metalog": { - "pbench": { - "config": None, - "date": None, - "name": None, - "script": None, + "keys": { + "dataset": { + "access": None, + "id": None, + "metalog": { + "pbench": { + "config": None, + "date": None, + "name": None, + "script": None, + }, + "run": {"controller": None}, }, - "run": {"controller": None}, + "name": None, + "owner_id": None, + "resource_id": None, + "uploaded": None, }, - "name": None, - "owner_id": None, - "resource_id": None, - "uploaded": None, - }, - "global": {"contact": None, "legacy": {"server": None}}, - "server": { - "deletion": None, - "index-map": { - "unit-test.v5.result-data-sample.2020-08": None, - "unit-test.v6.run-data.2020-08": None, - "unit-test.v6.run-toc.2020-05": None, + "global": {"contact": None, "legacy": {"server": None}}, + "server": { + "deletion": None, + "index-map": { + "unit-test.v5.result-data-sample.2020-08": None, + "unit-test.v6.run-data.2020-08": None, + "unit-test.v6.run-toc.2020-05": None, + }, + "origin": None, }, - "origin": None, - }, + } } def get_daterange_results( @@ -711,15 +713,15 @@ def get_daterange_results( Returns: {"from": first_date, "to": last_date} + + or + + {} if the list is empty """ - from_time = datetime.datetime.now(datetime.timezone.utc) - to_time = datetime.datetime( - year=1970, month=1, day=1, tzinfo=datetime.timezone.utc - ) - for name in sorted(name_list): - dataset = Dataset.query(name=name) - to_time = max(dataset.uploaded, to_time) - from_time = min(dataset.uploaded, from_time) + if not name_list: + return {} + to_time = max(Dataset.query(name=n).uploaded for n in name_list) + from_time = min(Dataset.query(name=n).uploaded for n in name_list) return {"from": from_time.isoformat(), "to": to_time.isoformat()} @pytest.mark.parametrize( @@ -728,6 +730,7 @@ def get_daterange_results( ("drb", {"owner": "drb"}, ["drb", "fio_1"]), ("drb", {"mine": "true"}, ["drb", "fio_1"]), ("drb", {"access": "public"}, ["fio_1", "fio_2"]), + ("drb", {"name": "noname"}, []), ("test_admin", {"owner": "drb"}, ["drb", "fio_1"]), ("drb", {}, ["drb", "fio_1", "fio_2"]), ( @@ -766,3 +769,44 @@ def test_dataset_daterange(self, query_as, login, query, results): query["daterange"] = "true" result = query_as(query, login, HTTPStatus.OK) assert result.json == self.get_daterange_results(results) + + def test_key_and_dates(self, query_as): + """Test keyspace summary in combination with date range + + This tests that we can use "keysummary" and "daterange" together. + """ + response = query_as( + {"keysummary": "true", "daterange": "true"}, "drb", HTTPStatus.OK + ) + assert response.json == { + "from": "1978-06-26T08:00:00+00:00", + "to": "2022-01-01T00:00:00+00:00", + "keys": { + "dataset": { + "access": None, + "id": None, + "metalog": { + "pbench": { + "config": None, + "date": None, + "name": None, + "script": None, + }, + "run": {"controller": None}, + }, + "name": None, + "owner_id": None, + "resource_id": None, + "uploaded": None, + }, + "global": {"contact": None}, + "server": { + "deletion": None, + "index-map": { + "unit-test.v5.result-data-sample.2020-08": None, + "unit-test.v6.run-data.2020-08": None, + "unit-test.v6.run-toc.2020-05": None, + }, + }, + }, + } From fb5034a66b9acfc0bb3ec014e4a76111c15eff4e Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 7 Apr 2023 09:35:51 -0400 Subject: [PATCH 3/4] A few more tweaks --- docs/API/V1/list.md | 8 ++++++-- lib/pbench/server/api/resources/datasets_list.py | 9 +++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md index b25ad89885..48d9d5875f 100644 --- a/docs/API/V1/list.md +++ b/docs/API/V1/list.md @@ -30,7 +30,9 @@ private datasets, while specifying `public` will show only `public` datasets `daterange` boolean \ Instead of returning a filtered set of datasets, return only the upload timestamps of the oldest and most recent datasets in the filtered set. This -can be useful for initializing a date picker, for example. +may be useful for initializing a date picker. If no datasets are selected by +the specified filters, the `from` and `to` keys (see +[results](#dataset-date-range)) will not be returned. `end` date/time \ Select only datasets created on or before the specified time. Time should be @@ -77,7 +79,9 @@ for the set of datasets. See [metadata](../metadata.md) for deails on the Pbench Server metadata namespaces. Because the `global` and `user` namespaces are completely dynamic, and the `dataset.metalog` sub-namespace varies greatly across Pbench Agent benchmark scripts, this mode provides a mechanism for a -metadata visualizer to understand what's available for a set of datasets. +metadata visualizer to understand what's available for a set of datasets. If no +datasets are selected by the specified filters, the `keys` key (see +[results](#key-namespace-summary) will be set to an empty object. `limit` integer \ "Paginate" the selected datasets by returning at most `limit` datasets. This diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py index 4123c8a528..410d9d0c0d 100644 --- a/lib/pbench/server/api/resources/datasets_list.py +++ b/lib/pbench/server/api/resources/datasets_list.py @@ -332,14 +332,15 @@ def keyspace(self, query: Query) -> JSONOBJECT: Returns: The aggregated keyspace JSON object """ - aggregate: JSONOBJECT = { - "dataset": {c.name: None for c in Dataset.__table__._columns} - } - Database.dump_query(query, current_app.logger) + aggregate: JSONOBJECT = {} datasets = query.all() for d in datasets: + if not aggregate: + aggregate.update( + {"dataset": {c.name: None for c in Dataset.__table__._columns}} + ) for m in d.metadatas: # "metalog" is a top-level key in the Metadata schema, but we # report it as a sub-key of "dataset". From 510e26404ebb61dbcf2857cfdf4d517b3a64f759 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 7 Apr 2023 16:10:32 -0400 Subject: [PATCH 4/4] Typo --- docs/API/V1/list.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md index 48d9d5875f..236fe3fdc5 100644 --- a/docs/API/V1/list.md +++ b/docs/API/V1/list.md @@ -81,7 +81,7 @@ are completely dynamic, and the `dataset.metalog` sub-namespace varies greatly across Pbench Agent benchmark scripts, this mode provides a mechanism for a metadata visualizer to understand what's available for a set of datasets. If no datasets are selected by the specified filters, the `keys` key (see -[results](#key-namespace-summary) will be set to an empty object. +[results](#key-namespace-summary)) will be set to an empty object. `limit` integer \ "Paginate" the selected datasets by returning at most `limit` datasets. This