Skip to content

Commit

Permalink
Return "raw" API parameters in pagination (#3412)
Browse files Browse the repository at this point in the history
* Return "raw" API parameters in pagination

PBENCH-1133

The `GET /datasets` response is optimized for sequential pagination, providing
a convenient "next_url" string that can be used directly. However if a client
wants to support "random access" pagination, this requires that the client
parses the URL string in order to modify the `offset` parameter.

This attempts to make that a bit easier by supplementing the current response
payload with a `parameters` field containing the query parameters JSON object,
making it easy to update the `offset` parameter.

(Making the unit tests work against the normalized parameter list proved a bit
challenging and I ended up saving the original "raw" client parameters in the
API `context` so they can be used directly.)
  • Loading branch information
dbutenhof authored May 3, 2023
1 parent 245bb0d commit 0a5a032
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 27 deletions.
13 changes: 7 additions & 6 deletions lib/pbench/server/api/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1700,11 +1700,8 @@ def _dispatch(
try:
if schema.query_schema:
query_params = self._gather_query_params(request, schema.query_schema)

params = self.schemas.validate(
method,
ApiParams(body=body_params, query=query_params, uri=uri_params),
)
raw_params = ApiParams(body=body_params, query=query_params, uri=uri_params)
params = self.schemas.validate(method, raw_params)
except APIInternalError as e:
current_app.logger.exception("{} {}", api_name, e.details)
abort(e.http_status, message=str(e))
Expand Down Expand Up @@ -1772,7 +1769,11 @@ def _dispatch(
"attributes": None,
}

context = {"auditing": auditing, "attributes": schema.attributes}
context = {
"auditing": auditing,
"attributes": schema.attributes,
"raw_params": raw_params,
}
try:
response = execute(params, request, context)
except APIInternalError as e:
Expand Down
34 changes: 25 additions & 9 deletions lib/pbench/server/api/resources/datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def __init__(self, config: PbenchServerConfig):
)

def get_paginated_obj(
self, query: Query, json: JSON, url: str
self, query: Query, json: JSON, raw_params: ApiParams, url: str
) -> tuple[list[JSONOBJECT], dict[str, str]]:
"""Helper function to return a slice of datasets (constructed according
to the user specified limit and an offset number) and a paginated object
Expand All @@ -309,10 +309,15 @@ def get_paginated_obj(
"limit": 10 -> dataset[0: 10]
"offset": 20 -> dataset[20: total_items_count]
TODO: We may need to optimize the pagination
e.g Use of unique pointers to record the last returned row and then
use this pointer in subsequent page request instead of an initial
start to narrow down the result.
Args:
query: A SQLAlchemy query object
json: The query parameters in normalized JSON form
raw_params: The original API parameters for reference
url: The API URL
Returns:
The list of Dataset objects matched by the query and a pagination
framework object.
"""
paginated_result = {}
query = query.distinct()
Expand All @@ -333,14 +338,19 @@ def get_paginated_obj(
Database.dump_query(query, current_app.logger)

items = query.all()
raw = raw_params.query.copy()
next_offset = offset + len(items)
if next_offset < total_count:
json["offset"] = next_offset
json["offset"] = str(next_offset)
raw["offset"] = str(next_offset)
parsed_url = urlparse(url)
next_url = parsed_url._replace(query=urlencode_json(json)).geturl()
else:
if limit:
raw["offset"] = str(total_count)
next_url = ""

paginated_result["parameters"] = raw
paginated_result["next_url"] = next_url
paginated_result["total"] = total_count
return items, paginated_result
Expand Down Expand Up @@ -600,7 +610,12 @@ def daterange(self, query: Query) -> JSONOBJECT:
return {}

def datasets(
self, request: Request, aliases: dict[str, Any], json: JSONOBJECT, query: Query
self,
request: Request,
aliases: dict[str, Any],
json: JSONOBJECT,
raw_params: ApiParams,
query: Query,
) -> JSONOBJECT:
"""Gather and paginate the selected datasets
Expand All @@ -611,6 +626,7 @@ def datasets(
request: The HTTP Request object
aliases: Map of join column aliases for each Metadata namespace
json: The JSON query parameters
raw_params: The original API parameters (used for pagination)
query: The basic filtered SQLAlchemy query object
Returns:
Expand Down Expand Up @@ -666,7 +682,7 @@ def datasets(

try:
datasets, paginated_result = self.get_paginated_obj(
query=query, json=json, url=request.url
query=query, json=json, raw_params=raw_params, url=request.url
)
except (AttributeError, ProgrammingError, StatementError) as e:
raise APIInternalError(
Expand Down Expand Up @@ -812,5 +828,5 @@ def _get(
result.update(self.daterange(query))
done = True
if not done:
result = self.datasets(request, aliases, json, query)
result = self.datasets(request, aliases, json, context["raw_params"], query)
return jsonify(result)
62 changes: 50 additions & 12 deletions lib/pbench/test/unit/server/test_datasets_list.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
from http import HTTPStatus
import re
from typing import Optional
from typing import Any, Optional

import pytest
import requests
Expand All @@ -10,7 +10,7 @@
from sqlalchemy.orm import aliased, Query

from pbench.server import JSON, JSONARRAY, JSONOBJECT
from pbench.server.api.resources import APIAbort
from pbench.server.api.resources import APIAbort, ApiParams
from pbench.server.api.resources.datasets_list import DatasetsList, urlencode_json
from pbench.server.database.database import Database
from pbench.server.database.models.datasets import Dataset, Metadata
Expand Down Expand Up @@ -129,17 +129,27 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
Returns:
Paginated JSON object containing list of dataset values
"""

def convert(k: str, v: Any) -> Any:
if isinstance(v, str) and k in ("filter", "sort", "metadata"):
return [v]
elif isinstance(v, int):
return str(v)
else:
return v

results: list[JSON] = []
offset = query.get("offset", 0)
offset = int(query.get("offset", "0"))
limit = query.get("limit")

if limit:
next_offset = offset + limit
next_offset = offset + int(limit)
paginated_name_list = name_list[offset:next_offset]
if next_offset >= len(name_list):
query["offset"] = str(len(name_list))
next_url = ""
else:
query["offset"] = next_offset
query["offset"] = str(next_offset)
next_url = (
f"http://localhost{server_config.rest_uri}/datasets?"
+ urlencode_json(query)
Expand All @@ -161,7 +171,15 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
},
}
)
return {"next_url": next_url, "results": results, "total": len(name_list)}
q1 = {k: convert(k, v) for k, v in query.items()}
if "metadata" not in q1:
q1["metadata"] = ["dataset.uploaded"]
return {
"parameters": q1,
"next_url": next_url,
"results": results,
"total": len(name_list),
}

def compare_results(
self, result: JSONOBJECT, name_list: list[str], query: JSON, server_config
Expand Down Expand Up @@ -190,8 +208,8 @@ def compare_results(
(None, {}, ["fio_1", "fio_2"]),
(None, {"access": "public"}, ["fio_1", "fio_2"]),
("drb", {"name": "fio"}, ["fio_1", "fio_2"]),
("drb", {"name": "fio", "limit": 1}, ["fio_1", "fio_2"]),
("drb", {"name": "fio", "limit": 1, "offset": 2}, ["fio_1", "fio_2"]),
("drb", {"name": "fio", "limit": "1"}, ["fio_1", "fio_2"]),
("drb", {"name": "fio", "limit": 1, "offset": "2"}, ["fio_1", "fio_2"]),
("drb", {"name": "fio", "offset": 1}, ["fio_1", "fio_2"]),
("drb", {"name": "fio", "offset": 2}, ["fio_1", "fio_2"]),
("drb", {"owner": "drb"}, ["drb", "fio_1"]),
Expand Down Expand Up @@ -256,6 +274,17 @@ def compare_results(
def test_dataset_list(self, server_config, query_as, login, query, results):
"""Test `datasets/list` filters
NOTE: Several of these queries use the "limit" and/or "offset" options
to test how the result set is segmented during pagination. These are
represented in the parametrization above interchangeably as integers or
strings. This is because (1) the actual input to the Pbench Server API
is always in string form as a URI query parameter but (2) the requests
package understands this and stringifies integer parameters while (3)
the Pbench Server API framework recognizes these are integer values and
presents them to the API code as integers. Mixing integer and string
representation here must have no impact on the operation of the API so
it's worth testing.
Args:
server_config: The PbenchServerConfig object
query_as: A fixture to provide a helper that executes the API call
Expand Down Expand Up @@ -311,7 +340,9 @@ def test_mine_novalue(self, server_config, client, more_datasets, get_token_func
headers=headers,
)
assert response.status_code == HTTPStatus.OK
self.compare_results(response.json, ["drb", "fio_1"], {}, server_config)
self.compare_results(
response.json, ["drb", "fio_1"], {"mine": ""}, server_config
)

@pytest.mark.parametrize(
"login,query,results",
Expand All @@ -336,7 +367,7 @@ def test_dataset_paged_list(self, query_as, login, query, results, server_config
results: A list of the dataset names we expect to be returned
server_config: The PbenchServerConfig object
"""
query.update({"metadata": ["dataset.uploaded"], "limit": 5})
query.update({"metadata": ["dataset.uploaded"], "limit": "5"})
result = query_as(query, login, HTTPStatus.OK)
self.compare_results(result.json, results, query, server_config)

Expand Down Expand Up @@ -384,6 +415,7 @@ def test_get_key_errors(self, query_as):
)
assert response.json == {
"next_url": "",
"parameters": {"metadata": ["global.test.foo"]},
"results": [
{
"metadata": {"global.test.foo": None},
Expand Down Expand Up @@ -444,6 +476,12 @@ def test_use_funk_metalog_keys(self, query_as):
)
assert response.json == {
"next_url": "",
"parameters": {
"filter": [
"dataset.metalog.iterations/[email protected]_name:~10"
],
"metadata": ["dataset.metalog.iterations/fooBar=10-what_else@weird"],
},
"results": [
{
"metadata": {
Expand Down Expand Up @@ -725,7 +763,7 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
"drb",
HTTPStatus.OK,
)
self.compare_results(response.json, results, {}, server_config)
self.compare_results(response.json, results, {"filter": query}, server_config)

@pytest.mark.parametrize(
"query,message",
Expand Down Expand Up @@ -769,7 +807,7 @@ def test_pagination_error(self, caplog, monkeypatch, query_as, exception, error)
"""

def do_error(
self, query: Query, json: JSONOBJECT, url: str
self, query: Query, json: JSONOBJECT, raw_params: ApiParams, url: str
) -> tuple[JSONARRAY, JSONOBJECT]:
raise exception

Expand Down

0 comments on commit 0a5a032

Please sign in to comment.