Skip to content

Commit

Permalink
More tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
dbutenhof committed Apr 20, 2023
1 parent fd563bb commit ced6fc5
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 22 deletions.
19 changes: 10 additions & 9 deletions docs/API/V1/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,24 +83,25 @@ substring "foo".
Multiple expressions may be combined across multiple `filter` query parameters
or as comma-separated lists in a single query parameter. Multiple filter
expressions are combined as an `AND` expression, matching only when all
expressions match. However a set of consecutive filter expressions can form
an `OR` expression by using the circumflex (`^`) "chain" character on each
expression term. The first expression with `^` begins an `OR` list while the
first subsequent expression without `^` ends the `OR` list and is combined with
the nested `OR` expression as an `AND`.
expressions match. However any consecutive set of expressions starting with `^`
are collected into an "`OR` list" that will be `AND`-ed with the surrounding
terms.

For example,
- `filter=dataset.name:a,server.origin:EC2` returns datasets with a name of
"a" and an origin of "EC2".
- `filter=dataset.name:a,^server.origin:EC2,^dataset.metalog.pbench.script:fio`
returns datasets with a name of "a" and *either* an origin of "EC2" or generated
from the "pbench-fio" script.
- `filter=dataset.name:~andy,^server.origin:EC2,^server.origin:RIYA,
dataset.access:public`
returns only "public" datasets with a name containing the string "andy" which also
have an origin of either "EC2" or "RIYA". As a SQL query, we might write it
as `dataset.name like "%andy%" and (server.origin = 'EC2' or
server.origin = 'RIYA') and dataset.access = 'public'`.

_NOTE_: `filter` expression term values, like the `true` in
`GET /api/v1/datasets?filter=server.archiveonly:true`, are by default
interpreted as strings, so be careful about the string representation of the
value. In this case, `server.archiveonly` is a boolean, which will be matched
as a string value `true` or `false`. You can instead specify the expression
as a string value "true" or "false". You can instead specify the expression
term as `server.archiveonly:t:bool` which will treat the specified match value
as a boolean (`t[rue]` or `y[es]` for true, `f[alse]` or `n[o]` for false) and
match against the boolean metadata value.
Expand Down
25 changes: 17 additions & 8 deletions lib/pbench/server/api/resources/datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def __init__(self, term: str):
self.buffer = term
self.logger = logging.getLogger("term")

@classmethod
def parse(cls, term: str) -> "Term":
"""Factory method to construct an instance and parse a string
Args:
term: A filter expression to parse
"""
return cls(term).parse_filter()

def _remove_prefix(
self, prefixes: tuple[str], default: Optional[str] = None
) -> Optional[str]:
Expand Down Expand Up @@ -188,7 +197,7 @@ def _next_token(self, optional: bool = False) -> str:
return next

def parse_filter(self) -> "Term":
"""Parse a filter term like "<key>:[<op>]value[:type]"
"""Parse a filter term like "<key>:[<op>]<value>[:<type>]"
Returns a dictionary with "key", "operator" (default "="), "value", and
"type" fields.
Expand Down Expand Up @@ -219,7 +228,7 @@ def parse_filter(self) -> "Term":
self.value = self._next_token(optional=True)

# The comparison type, defaults to "str"
self.type = self.buffer.lower()
self.type = self.buffer.lower() if self.buffer else "str"
if self.type and self.type not in TYPES:
raise APIAbort(
HTTPStatus.BAD_REQUEST,
Expand Down Expand Up @@ -376,9 +385,9 @@ def filter_query(
int Perform an integer match
bool Perform a boolean match (boolean values are t[rue], f[alse],
y[es], and n[o])
date Perform a date match: the selected key must have a representing
a date-time string (ISO-8601 preferred). UTC is assumed if no
timezone is specified.
date Perform a date match: the selected key value (and supplied
filter value) must be strings representing a date-time, ideally
in ISO-8601 format. UTC is assumed if no timezone is specified.
For example
Expand Down Expand Up @@ -440,11 +449,11 @@ def filter_query(
or_list = []
and_list = []
for kw in filters:
term = Term(kw).parse_filter()
term = Term.parse(kw)
combine_or = term.chain == "^"
keys = term.key.split(".")
native_key = keys.pop(0).lower()
vtype = term.type if term.type else "str"
vtype = term.type
value = TYPES[vtype].convert(term.value, None)
filter = None

Expand All @@ -465,7 +474,7 @@ def filter_query(
if not isinstance(c.type, TYPES[vtype].sqltype):
raise APIAbort(
HTTPStatus.BAD_REQUEST,
f"Type {vtype!r} of value {value!r} is not compatible with dataset column {c.name}",
f"Filter of type {vtype!r} is not compatible with key 'dataset.{c.name}'",
)
column = c
except AttributeError as e:
Expand Down
10 changes: 5 additions & 5 deletions lib/pbench/test/unit/server/test_datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
"""Verify DB engine behavior for mismatched metadata casts.
Verify that a typed filter ignores datasets where the metadata key
type isn't compatible with the implicit cast.
type isn't compatible with the required cast.
"""
drb = Dataset.query(name="drb")
fio_1 = Dataset.query(name="fio_1")
Expand All @@ -732,19 +732,19 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
[
(
"dataset.name:t:bool",
"Type 'bool' of value True is not compatible with dataset column name",
"Filter of type 'bool' is not compatible with key 'dataset.name'",
),
(
"dataset.uploaded:>2:int",
"Type 'int' of value 2 is not compatible with dataset column uploaded",
"Filter of type 'int' is not compatible with key 'dataset.uploaded'",
),
],
)
def test_mismatched_dataset_cast(self, query_as, server_config, query, message):
"""Verify DB engine behavior for mismatched metadata casts.
Verify that a typed filter ignores datasets where the metadata key
type isn't compatible with the implicit cast.
Verify that a typed filter generates an error when it targets a primary
dataset key with an incompatible type.
"""
response = query_as(
{"filter": query, "metadata": ["dataset.uploaded"]},
Expand Down

0 comments on commit ced6fc5

Please sign in to comment.