From 83411ab0cbacd35e172d8fa3bf17f0139d711bf1 Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Fri, 14 Apr 2023 17:36:21 -0400
Subject: [PATCH 1/6] Add typed filtering

PBENCH-1124

Support type-cast filter expressions in `GET /datasets`. The primary objective
is to support a paginated "Expiring Soon" view in the dashboard, requiring the
ability to look for datasets expiring before a fixed timestamp. Previously,
`GET /datasets?filter` worked by casting all SQL data to "string" and then
comparing against the raw extracted string from the query parameter. Now it's
possible to identify a type as well as additional comparison operators. For
example, `GET /datasets?filter=server.deletion:<2023-05-01:date` will select
all datasets with expiration timestamps earlier than 2023-05-1.

To target this specific capability, the functional tests now override the
default `server.deletion` on some uploads and verify that those datasets are
returned by the filtered query.
---
 jenkins/run-server-func-tests                 |  13 +-
 lib/pbench/client/__init__.py                 |  13 +
 .../server/api/resources/datasets_list.py     | 304 +++++++++++++++---
 lib/pbench/test/functional/server/test_put.py |  92 +++++-
 .../test/unit/server/test_datasets_list.py    | 101 +++++-
 5 files changed, 449 insertions(+), 74 deletions(-)

diff --git a/jenkins/run-server-func-tests b/jenkins/run-server-func-tests
index d5eb268ad8..09708af384 100755
--- a/jenkins/run-server-func-tests
+++ b/jenkins/run-server-func-tests
@@ -21,11 +21,19 @@ elif [[ -n "${1}" ]]; then
     exit 2
 fi
 
+function dump_journal {
+    printf -- "+++ journalctl dump +++\n"
+    # Try to capture the functional test container's logs.
+    podman exec ${PB_SERVER_CONTAINER_NAME} journalctl
+    printf -- "\n--- journalctl dump ---\n\n"
+}
+
 function cleanup {
     if [[ -n "${cleanup_flag}" ]]; then
         # Remove the Pbench Server container and the dependencies pod which we
         # just created and ran; remove any dangling containers; and then remove
         # any dangling images.
+        dump_journal
         echo "Forcefully removing the Pbench Server container..." >&2
         podman rm --force --ignore ${PB_SERVER_CONTAINER_NAME}
         echo "Forcefully removing the Pbench Support Services pod..." >&2
@@ -85,10 +93,7 @@ fi
 
 if [[ ${exit_status} -ne 0 ]]; then
     printf -- "\nFunctional tests exited with code %s\n" ${exit_status}
-    printf -- "+++ journalctl dump +++\n"
-    # Try to capture the functional test container's logs.
-    podman exec ${PB_SERVER_CONTAINER_NAME} journalctl
-    printf -- "\n--- journalctl dump ---\n\n"
+    dump_journal
 fi
 
 if [[ -z "${cleanup_flag}" ]]; then
diff --git a/lib/pbench/client/__init__.py b/lib/pbench/client/__init__.py
index 6015509644..c7135f9a14 100644
--- a/lib/pbench/client/__init__.py
+++ b/lib/pbench/client/__init__.py
@@ -489,3 +489,16 @@ def update(
             uri_params={"dataset": dataset_id},
             params=params,
         ).json()
+
+    def get_settings(self, key: Optional[str] = None) -> JSONOBJECT:
+        """Return requested metadata for a specified dataset.
+
+        Args:
+            dataset_id: the resource ID of the targeted dataset
+            metadata: a list of metadata keys to return
+
+        Returns:
+            A JSON document containing the requested key values
+        """
+        params = {"key": key} if key else None
+        return self.get(api=API.SERVER_SETTINGS, uri_params=params).json()
diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py
index 58e1dae0ec..cf70fb2ddf 100644
--- a/lib/pbench/server/api/resources/datasets_list.py
+++ b/lib/pbench/server/api/resources/datasets_list.py
@@ -1,14 +1,16 @@
+from dataclasses import dataclass
 from http import HTTPStatus
-from typing import Any
+import logging
+from typing import Any, Callable, Optional
 from urllib.parse import urlencode, urlparse
 
 from flask import current_app
 from flask.json import jsonify
 from flask.wrappers import Request, Response
-from sqlalchemy import and_, asc, cast, desc, func, or_, String
+from sqlalchemy import and_, asc, Boolean, cast, desc, func, Integer, or_, String
 from sqlalchemy.exc import ProgrammingError, StatementError
 from sqlalchemy.orm import aliased, Query
-from sqlalchemy.sql.expression import Alias
+from sqlalchemy.sql.expression import Alias, BinaryExpression, ColumnElement
 
 from pbench.server import JSON, JSONOBJECT, OperationCode, PbenchServerConfig
 from pbench.server.api.resources import (
@@ -20,12 +22,17 @@
     ApiMethod,
     ApiParams,
     ApiSchema,
+    convert_boolean,
+    convert_date,
+    convert_int,
+    convert_string,
     Parameter,
     ParamType,
     Schema,
 )
 import pbench.server.auth.auth as Auth
 from pbench.server.database.database import Database
+from pbench.server.database.models import TZDateTime
 from pbench.server.database.models.datasets import (
     Dataset,
     Metadata,
@@ -34,6 +41,67 @@
 )
 
 
+@dataclass
+class Type:
+    """Link query filter type data
+
+    Link the filter type name, the equivalent SQLAlchemy type, and the Pbench
+    API conversion method to create a compatible object from a string.
+
+    Fields:
+        sqtype: The base SQLAlchemy type corresponding to a Python target type
+        convert: The Pbench API conversion method to verify and convert a
+                string
+    """
+
+    sqltype: Any
+    convert: Callable[[str, Any], Any]
+
+
+"""Associate the name of a filter type to the Type record describing it."""
+TYPES = {
+    "bool": Type(Boolean, convert_boolean),
+    "date": Type(TZDateTime, convert_date),
+    "int": Type(Integer, convert_int),
+    "str": Type(String, convert_string),
+}
+
+
+"""Define the set of operators we allow in query filters.
+
+This maps a symbolic name to a SQLAlchemy filter method on the column.
+"""
+OPERATORS = {
+    "~": "contains",
+    "=": "__eq__",
+    "<": "__lt__",
+    ">": "__gt__",
+    "<=": "__le__",
+    ">=": "__ge__",
+    "!=": "__ne__",
+}
+
+
+def make_operator(
+    expression: ColumnElement, operator: str, value: Any
+) -> BinaryExpression:
+    """Return the SQLAlchemy operator method of a column or JSON expression.
+
+    Args:
+        expression: A SQLAlchemy expression or column
+        operator: The operator prefix
+        value: The value to be compared against
+
+    Returns:
+        A SQLAlchemy filter expression.
+    """
+    x = getattr(expression, OPERATORS[operator])(value)
+    current_app.logger.info(
+        "Expression type {} yields type {}", type(expression).__name__, type(x).__name__
+    )
+    return x
+
+
 def urlencode_json(json: JSON) -> str:
     """We must properly encode the metadata query parameter as a list of keys."""
     new_json = {}
@@ -42,6 +110,135 @@ def urlencode_json(json: JSON) -> str:
     return urlencode(new_json)
 
 
+class Term:
+    """Help parsing a filter term."""
+
+    def __init__(self, term: str):
+        self.chain = None
+        self.key = None
+        self.value = None
+        self.term = term
+        self.buffer = term
+        self.logger = logging.getLogger("term")
+
+    def _remove_prefix(
+        self, prefixes: tuple[str], default: Optional[str] = None
+    ) -> Optional[str]:
+        """Remove a string prefix.
+
+        Looking at the current buffer, remove a known prefix before processing
+        the next character. A list of prefixes may be specified: they're sorted
+        longest first so that we don't accidentally match a substring.
+
+        Args:
+            prefixes: a collection of prefix strings
+            default: the default prefix if none appears in the string.
+
+        Returns:
+            The prefix, or the default.
+        """
+        prefix = default
+        for p in sorted(prefixes, key=lambda k: len(k), reverse=True):
+            if self.buffer.startswith(p):
+                prefix = self.buffer[: len(p)]
+                self.buffer = self.buffer[len(p) :]
+                break
+        return prefix
+
+    def _next_token(self, optional: bool = False) -> str:
+        """Extract a string from the head of the buffer.
+
+        To filter using metadata keys (e.g., unusual benchmark generated
+        metadata.log keys) or values (e.g. a date/time string) which may
+        contain ":" symbols, the key or value may be enclosed in quotes.
+        Either single or double quotes are allowed.
+
+        We're looking for the ":" symbol delimiting the next term of the filter
+        expression, or (if that's marked "optional") the end of the buffer.
+
+        Args:
+            optional: The terminating ":" separator is optional.
+
+        Returns:
+            The token, unquoted
+        """
+        buffer = self.buffer
+        if buffer.startswith(("'", '"')):
+            quote = buffer[:1]
+            end = buffer.find(quote, 1)
+            if end < 0:
+                raise APIAbort(
+                    HTTPStatus.BAD_REQUEST, f"Bad quote termination in {self.term!r}"
+                )
+            next = buffer[1:end]
+            end += 1
+        else:
+            end = buffer.find(":")
+            if end < 0:
+                if not optional:
+                    raise APIAbort(
+                        HTTPStatus.BAD_REQUEST, f"Missing terminator for {buffer!r}"
+                    )
+                next = buffer
+                end = len(buffer)
+            else:
+                next = buffer[:end]
+        buffer = buffer[end:]
+        if not optional and (len(buffer) == 0 or not buffer.startswith(":")):
+            self.logger.info(
+                "[optional %s, len %d, next %s]", optional, len(buffer), buffer[0]
+            )
+            raise APIAbort(HTTPStatus.BAD_REQUEST, f"Missing terminator for {buffer!r}")
+        buffer = buffer[1:]
+        self.buffer = buffer
+        return next
+
+    def parse_filter(self) -> "Term":
+        """Parse a filter term like "server.deletion:[<op>]value:type"
+
+        Returns a dictionary with "native_key", "full_key", "operator" (default
+        "="), "value", and "type" fields.
+
+        The key and value can be quoted to include mixed case and symbols,
+        including the ":"character, by starting and starting and ending the key
+        or value with the "'" or '"' character, e.g.,
+        "'dataset.metalog.tool/iteration:1':'foo:1#bar':str".
+
+        Returns:
+            self
+        """
+
+        # The "chain" allows chaining multiple expressions as an OR
+        self.chain = self._remove_prefix(("^",))
+
+        # The metadata key token
+        self.key = self._next_token()
+        if not Metadata.is_key_path(
+            self.key, Metadata.METADATA_KEYS, metalog_key_ok=True
+        ):
+            raise APIAbort(HTTPStatus.BAD_REQUEST, str(MetadataBadKey(self.key)))
+
+        # The comparison operator (defaults to "=")
+        self.operator = self._remove_prefix(OPERATORS.keys(), default="=")
+
+        # The filter value
+        self.value = self._next_token(optional=True)
+
+        # The comparison type, defaults to "str"
+        self.type = self.buffer.lower()
+        if self.type and self.type not in TYPES:
+            raise APIAbort(
+                HTTPStatus.BAD_REQUEST,
+                f"The filter type {self.type!r} must be one of {','.join(t for t in TYPES.keys())}",
+            )
+        if not self.key or not self.value:
+            raise APIAbort(
+                HTTPStatus.BAD_REQUEST,
+                f"filter {self.term!r} must have the form 'k:v[:type]'",
+            )
+        return self
+
+
 class DatasetsList(ApiBase):
     """API class to list datasets based on database metadata."""
 
@@ -159,14 +356,41 @@ def filter_query(
         'saved': True}` because there's no mechanism to parse JSON expressions.
         This might be added in the future.
 
-        To match against a key value, separate the key path from the value with
-        a `:` character, e.g., "dataset.name:fio". This produces an exact match
-        expression as an AND (required) term in the query: only datasets with
-        the exact name "fio" will be returned.
-
-        To match against a subset of a key value, insert a tilde (~) following
-        the `:`, like "dataset.name:~fio". This produces a "contains" filter
-        that will match against any name containing the substring "fio".
+        Specify the value to match separated from the key path by a `:`
+        character, e.g., "dataset.name:fio". By default this produces an exact
+        match expression as an AND (required) term in the query: only datasets
+        with the exact name "fio" will be returned.
+
+        The value may be prefixed by an optional operator in order to perform a
+        comparison other than strict equality:
+
+        =   Look for matches strictly equal to the specified value (default)
+        ~   Look for matches containing the specified value as a substring
+        >   Look for matches strictly greater than the specified value
+        <   Look for matches strictly less than the specified value
+        >=  Look for matches greater than or equal to the specified value
+        <=  Look for matches less than or equal to the specified value
+        !=  Look for matches not strictly equal to the specified value
+
+        After the value, you can optionally specify a type for the comparison.
+        Note that an incompatible type (other than the default "str") for a
+        primary "dataset" column will be rejected, but general Metadata
+        (including "dataset.metalog") is stored as generic JSON and the API
+        will attempt to cast the selected data to the specified type.
+
+        str     Perform a string match
+        int     Perform an integer match
+        bool    Perform a boolean match (boolean values are t[rue], f[alse],
+                y[es], and n[o])
+        date    Perform a date match: the selected key must have a string value
+                in ISO-8601 format, and will be interpreted as UTC if no time
+                zone is specified.
+
+        For example
+
+            dataset.uploaded:>2023-05-01:date
+            global.dashboard.seen:t:bool
+            dataset.metalog.pbench.script:!=fio
 
         To produce an OR (optional) filter term, you can prefix the `key:value`
         with the caret (^) character. Adjacent OR terms will be combined in a
@@ -177,8 +401,8 @@ def filter_query(
 
         AND and OR terms can be chained by ordering them:
 
-            "dataset.name:~fio,^global.dashboard.saved:true,
-            ^user.dashboard.favorite:true,server.origin:RIYA"
+            "dataset.name:~fio,^global.dashboard.saved:true:bool,
+            ^user.dashboard.favorite:true:bool,server.origin:RIYA"
 
         will match any dataset with a name containing the substring "fio" which
         ALSO has been marked in the dashboard as either "saved" OR (by the
@@ -223,24 +447,13 @@ def filter_query(
         and_list = []
         for kw in filters:
             combine_or = False
-            contains = False
-            try:
-                k, v = kw.split(":", maxsplit=1)
-            except ValueError:
-                raise APIAbort(
-                    HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k:v'"
-                )
-            if k.startswith("^"):
+            term = Term(kw).parse_filter()
+            if term.chain == "^":
                 combine_or = True
-                k = k[1:]
-            if v.startswith("~"):
-                contains = True
-                v = v[1:]
-
-            if not Metadata.is_key_path(k, Metadata.METADATA_KEYS, metalog_key_ok=True):
-                raise APIAbort(HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k)))
-            keys = k.split(".")
+            keys = term.key.split(".")
             native_key = keys.pop(0).lower()
+            vtype = term.type if term.type else "str"
+            value = TYPES[vtype].convert(term.value, None)
             filter = None
 
             if native_key == Metadata.DATASET:
@@ -254,19 +467,20 @@ def filter_query(
                 else:
                     try:
                         c = getattr(Dataset, second)
-                        try:
-                            is_str = c.type.python_type is str
-                        except NotImplementedError:
-                            is_str = False
-                        if is_str:
-                            column = c
-                        else:
+                        if vtype == "str" and not isinstance(c.type, String):
                             column = cast(getattr(Dataset, second), String)
+                        else:
+                            if not isinstance(c.type, TYPES[vtype].sqltype):
+                                raise APIAbort(
+                                    HTTPStatus.BAD_REQUEST,
+                                    f"Type {vtype} of value {value} is not compatible with dataset column {c.name}",
+                                )
+                            column = c
                     except AttributeError as e:
                         raise APIAbort(
-                            HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k))
+                            HTTPStatus.BAD_REQUEST, str(MetadataBadKey(term.key))
                         ) from e
-                    filter = column.contains(v) if contains else column == v
+                    filter = make_operator(column, term.operator, value)
             elif native_key == Metadata.USER:
                 # The user namespace requires special handling because the
                 # values are always qualified by the owning user rather than
@@ -275,14 +489,20 @@ def filter_query(
                 if not user_id:
                     raise APIAbort(
                         HTTPStatus.UNAUTHORIZED,
-                        f"Metadata key {k} cannot be used by an unauthenticated client",
+                        f"Metadata key {term.key} cannot be used by an unauthenticated client",
                     )
 
             # NOTE: We don't want to *evaluate* the filter expression here, so
-            # check explicitly for None.
+            # check explicitly for None. I.e., "we have no filter" rather than
+            # "the evaluated result of this filter is falsey".
             if filter is None:
-                expression = aliases[native_key].value[keys].as_string()
-                filter = expression.contains(v) if contains else expression == v
+                expression = (
+                    aliases[native_key]
+                    .value[keys]
+                    .as_string()
+                    .cast(TYPES[vtype].sqltype)
+                )
+                filter = make_operator(expression, term.operator, value)
 
             if combine_or:
                 or_list.append(filter)
diff --git a/lib/pbench/test/functional/server/test_put.py b/lib/pbench/test/functional/server/test_put.py
index b28061ee4a..b5389890ca 100644
--- a/lib/pbench/test/functional/server/test_put.py
+++ b/lib/pbench/test/functional/server/test_put.py
@@ -1,11 +1,12 @@
 from dataclasses import dataclass
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from http import HTTPStatus
 import os.path
 from pathlib import Path
 import re
 import time
 
+import dateutil.parser
 import pytest
 from requests.exceptions import HTTPError
 
@@ -14,6 +15,11 @@
 
 TARBALL_DIR = Path("lib/pbench/test/functional/server/tarballs")
 SPECIAL_DIR = TARBALL_DIR / "special"
+SHORT_EXPIRATION_DAYS = 10
+
+
+def utc_from_str(date: str) -> datetime:
+    return dateutil.parser.parse(date).replace(tzinfo=timezone.utc)
 
 
 @dataclass
@@ -32,17 +38,22 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user):
         """Upload each of the pregenerated tarballs, and perform some basic
         sanity checks on the resulting server state.
         """
-        print(" ... reporting behaviors ...")
+        print(" ... uploading tarballs ...")
 
         tarballs: dict[str, Tarball] = {}
         access = ["private", "public"]
         cur_access = 0
 
+        # We're going to make some datasets expire "soon" (much sooner than the
+        # default) so we can test filtering typed metadata.
+        expire_soon = datetime.now(timezone.utc) + timedelta(days=SHORT_EXPIRATION_DAYS)
+
         for t in TARBALL_DIR.glob("*.tar.xz"):
             a = access[cur_access]
             if a == "public":
                 metadata = (
-                    "server.origin:test,user.pbench.access:public,server.archiveonly:n"
+                    "server.origin:test,user.pbench.access:public,server.archiveonly:n",
+                    f"server.deletion:{expire_soon:%Y-%m-%d %H:%M%z}",
                 )
             else:
                 metadata = None
@@ -53,7 +64,7 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user):
             assert (
                 response.status_code == HTTPStatus.CREATED
             ), f"upload returned unexpected status {response.status_code}, {response.text} ({t})"
-            print(f"\t... uploaded {t.name}")
+            print(f"\t... uploaded {t.name}: {a}")
 
         datasets = server_client.get_list(
             metadata=["dataset.access", "server.tarball-path", "dataset.operations"]
@@ -182,7 +193,7 @@ def check_indexed(server_client: PbenchServerClient, datasets):
         not_indexed = []
         try:
             for dataset in datasets:
-                print(f"\t... on {dataset.metadata['server.tarball-path']}")
+                print(f"\t... on {dataset.name}")
                 metadata = server_client.get_metadata(
                     dataset.resource_id, ["dataset.operations"]
                 )
@@ -217,8 +228,7 @@ def test_index_all(self, server_client: PbenchServerClient, login_user):
         state and metadata look good.
         """
         tarball_names = frozenset(t.name for t in TARBALL_DIR.glob("*.tar.xz"))
-
-        print(" ... reporting behaviors ...")
+        print(" ... reporting dataset status ...")
 
         # Test get_list pagination: to avoid forcing a list, we'll count the
         # iterations separately. (Note that this is really an implicit test
@@ -233,6 +243,7 @@ def test_index_all(self, server_client: PbenchServerClient, login_user):
                 "dataset.access",
                 "server.archiveonly",
                 "server.origin",
+                "server.deletion",
                 "user.pbench.access",
             ],
         )
@@ -258,11 +269,11 @@ def test_index_all(self, server_client: PbenchServerClient, login_user):
                 f" = {exc.response.status_code}, text = {exc.response.text!r}"
             )
 
-        # For each dataset we find, poll the state until it reaches Indexed
-        # state, or until we time out.  Since the cron jobs run once a minute
-        # and they start on the minute, we make our 1st check 45 seconds into
-        # the next minute, and then check at 45 seconds past the minute until
-        # we reached 5 minutes past the original start time.
+        # For each dataset we find, poll the state until it has been indexed,
+        # or until we time out. The indexer runs once a minute slightly after
+        # the minute, so we make our 1st check 45 seconds into the next minute,
+        # and then check at 45 seconds past the minute until we reach 5 minutes
+        # past the original start time.
         oneminute = timedelta(minutes=1)
         now = start = datetime.utcnow()
         timeout = start + timedelta(minutes=5)
@@ -280,7 +291,7 @@ def test_index_all(self, server_client: PbenchServerClient, login_user):
                 if os.path.basename(tp) not in tarball_names:
                     continue
                 count += 1
-                print(f"\t... indexed {tp}")
+                print(f"\t... indexed {dataset.name}")
             now = datetime.utcnow()
             if not not_indexed or now >= timeout:
                 break
@@ -289,7 +300,7 @@ def test_index_all(self, server_client: PbenchServerClient, login_user):
             now = datetime.utcnow()
         assert not not_indexed, (
             f"Timed out after {(now - start).total_seconds()} seconds; unindexed datasets: "
-            + ", ".join(d.metadata["server.tarball-path"] for d in not_indexed)
+            + ", ".join(d.name for d in not_indexed)
         )
         assert (
             len(tarball_names) == count
@@ -421,6 +432,58 @@ def test_list_filter_and_or(self, server_client: PbenchServerClient, login_user)
                 "2018" in date or "2019" in date
             ), f"Dataset {dataset.name} date is {date}"
 
+    @pytest.mark.dependency(name="list_type", depends=["upload"], scope="session")
+    def test_list_filter_typed(self, server_client: PbenchServerClient, login_user):
+        """Test filtering using typed matches.
+
+        We compare the actual `server.deletion` time against what we believe we
+        set when uploading. Note that the metadata validator adds a full day to
+        the specified timestamp and then drops the time of day, effectively
+        rounding up to midnight, so here we add a day to the offset.
+        """
+        test_sets = {}
+        for t in TARBALL_DIR.glob("*.tar.xz"):
+            r = Dataset.md5(t)
+            m = server_client.get_metadata(
+                r,
+                [
+                    "dataset.access",
+                    "dataset.name",
+                    "dataset.uploaded",
+                    "server.deletion",
+                ],
+            )
+            test_sets[r] = m
+        soonish = datetime.now(timezone.utc) + timedelta(days=20)
+        print(
+            f"... find matches for 'server.deletion:<{soonish:%Y-%m-%dT%H:%M%z}:date'"
+        )
+
+        datasets = server_client.get_list(
+            metadata=[
+                "dataset.metalog.pbench.script",
+                "dataset.uploaded",
+                "server.deletion",
+            ],
+            owner="tester",
+            # NOTE: we use archaic "US standard" date notation here to minimize
+            # the risk of succeeding via a simple alphanumeric comparison.
+            filter=[f"server.deletion:<'{soonish:%m/%d/%Y %H:%M%z}':date"],
+        )
+
+        for dataset in datasets:
+            deletion = utc_from_str(dataset.metadata["server.deletion"])
+            assert (
+                deletion < soonish
+            ), f"Filter returned {dataset.name}, with expiration out of range ({deletion:%Y-%m-%d})"
+            test_sets.pop(dataset.resource_id, None)
+
+        for r, m in test_sets.items():
+            deletion = utc_from_str(m["server.deletion"])
+            assert (
+                deletion >= soonish
+            ), f"Filter failed to return {m['dataset.name']}, with expiration in range ({deletion:%Y-%m-%d})"
+
 
 class TestUpdate:
     @pytest.mark.dependency(name="publish", depends=["index"], scope="session")
@@ -447,6 +510,7 @@ class TestDelete:
             "list_and",
             "list_none",
             "list_or",
+            "list_type",
             "publish",
         ],
         scope="session",
diff --git a/lib/pbench/test/unit/server/test_datasets_list.py b/lib/pbench/test/unit/server/test_datasets_list.py
index 2583d584f7..60692054ab 100644
--- a/lib/pbench/test/unit/server/test_datasets_list.py
+++ b/lib/pbench/test/unit/server/test_datasets_list.py
@@ -492,18 +492,58 @@ def test_get_repeat_keys(self, query_as):
     @pytest.mark.parametrize(
         "filters,expected",
         [
-            (["dataset.name:fio"], "datasets.name = 'fio'"),
+            (["dataset.name:=fio"], "datasets.name = 'fio'"),
             (
-                ["dataset.metalog.pbench.script:fio"],
-                "dataset_metadata_1.value[['pbench', 'script']] = 'fio'",
+                ["dataset.uploaded:>2023-01-01:date"],
+                "datasets.uploaded > '2023-01-01 00:00:00'",
+            ),
+            (
+                ["dataset.uploaded:!=2023-01-01:date"],
+                "datasets.uploaded != '2023-01-01 00:00:00'",
+            ),
+            (
+                ["server.deletion:<2023-05-01:date"],
+                "CAST(dataset_metadata_2.value[['deletion']] AS DATETIME) < '2023-05-01 00:00:00'",
+            ),
+            (
+                ["'dataset.metalog.pbench.script':='fio'"],
+                "CAST(dataset_metadata_1.value[['pbench', 'script']] AS VARCHAR) = 'fio'",
+            ),
+            (
+                ["'dataset.metalog.pbench.script':!=fio"],
+                "CAST(dataset_metadata_1.value[['pbench', 'script']] AS VARCHAR) != 'fio'",
+            ),
+            (
+                ["dataset.metalog.run.date:~fio"],
+                "(CAST(dataset_metadata_1.value[['run', 'date']] AS VARCHAR) LIKE '%' || 'fio' || '%')",
+            ),
+            (
+                ["global.something.integer:<1:int"],
+                "CAST(dataset_metadata_3.value[['something', 'integer']] AS INTEGER) < 1",
+            ),
+            (
+                ["global.something.integer:>2:int"],
+                "CAST(dataset_metadata_3.value[['something', 'integer']] AS INTEGER) > 2",
+            ),
+            (
+                ["global.something.integer:<=1:int"],
+                "CAST(dataset_metadata_3.value[['something', 'integer']] AS INTEGER) <= 1",
+            ),
+            (
+                ["global.something.integer:>=2:int"],
+                "CAST(dataset_metadata_3.value[['something', 'integer']] AS INTEGER) >= 2",
+            ),
+            (
+                ["global.something.boolean:t:bool"],
+                "CAST(dataset_metadata_3.value[['something', 'boolean']] AS BOOLEAN) = true",
             ),
             (
                 ["user.d.f:1"],
-                "dataset_metadata_4.value[['d', 'f']] = '1'",
+                "CAST(dataset_metadata_4.value[['d', 'f']] AS VARCHAR) = '1'",
             ),
             (
-                ["dataset.name:~fio", "^global.x:1", "^user.y:~yes"],
-                "(datasets.name LIKE '%' || 'fio' || '%') AND (dataset_metadata_3.value[['x']] = '1' OR ((dataset_metadata_4.value[['y']]) LIKE '%' || 'yes' || '%'))",
+                ["dataset.name:~fio", "^'global.x':1", "^user.y:~yes"],
+                "(datasets.name LIKE '%' || 'fio' || '%') AND (CAST(dataset_metadata_3.value[['x']] AS VARCHAR) = '1' OR (CAST(dataset_metadata_4.value[['y']] AS VARCHAR) LIKE '%' || 'yes' || '%'))",
             ),
             (
                 ["dataset.uploaded:~2000"],
@@ -544,11 +584,11 @@ def test_filter_query(self, monkeypatch, client, filters, expected):
             (["dataset.name:fio"], "datasets.name = 'fio'"),
             (
                 ["dataset.metalog.pbench.script:fio"],
-                "dataset_metadata_1.value[['pbench', 'script']] = 'fio'",
+                "CAST(dataset_metadata_1.value[['pbench', 'script']] AS VARCHAR) = 'fio'",
             ),
             (
                 ["dataset.name:~fio", "^global.x:1"],
-                "(datasets.name LIKE '%' || 'fio' || '%') AND dataset_metadata_3.value[['x']] = '1'",
+                "(datasets.name LIKE '%' || 'fio' || '%') AND CAST(dataset_metadata_3.value[['x']] AS VARCHAR) = '1'",
             ),
             (
                 ["dataset.uploaded:~2000"],
@@ -596,15 +636,47 @@ def test_user_no_auth(self, monkeypatch, db_session):
         assert e.value.http_status == HTTPStatus.UNAUTHORIZED
 
     @pytest.mark.parametrize(
-        "meta,error",
+        "meta,error,message",
         [
-            ("user.foo:1", HTTPStatus.UNAUTHORIZED),
-            ("x.y:1", HTTPStatus.BAD_REQUEST),
-            ("global.x=3", HTTPStatus.BAD_REQUEST),
-            ("dataset.notright:10", HTTPStatus.BAD_REQUEST),
+            (
+                "user.foo:1",
+                HTTPStatus.UNAUTHORIZED,
+                "Metadata key user.foo cannot be used by an unauthenticated client",
+            ),
+            ("x.y:1", HTTPStatus.BAD_REQUEST, "Metadata key 'x.y' is not supported"),
+            (
+                "global.x=3",
+                HTTPStatus.BAD_REQUEST,
+                "Missing terminator for 'global.x=3'",
+            ),
+            (
+                "dataset.notright:10",
+                HTTPStatus.BAD_REQUEST,
+                "Metadata key 'dataset.notright' is not supported",
+            ),
+            (
+                "'dataset.name:foo",
+                HTTPStatus.BAD_REQUEST,
+                'Bad quote termination in "\'dataset.name:foo"',
+            ),
+            (
+                "dataset.name:'foo",
+                HTTPStatus.BAD_REQUEST,
+                'Bad quote termination in "dataset.name:\'foo"',
+            ),
+            (
+                "server.deletion:<2023-05-01:time",
+                HTTPStatus.BAD_REQUEST,
+                "The filter type 'time' must be one of bool,date,int,str",
+            ),
+            (
+                "server.deletion:2023-05-01:date:",
+                HTTPStatus.BAD_REQUEST,
+                "The filter type 'date:' must be one of bool,date,int,str",
+            ),
         ],
     )
-    def test_filter_errors(self, monkeypatch, db_session, meta, error):
+    def test_filter_errors(self, monkeypatch, client, meta, error, message):
         """Test invalid filter expressions."""
         monkeypatch.setattr(
             "pbench.server.api.resources.datasets_list.Auth.get_current_user_id",
@@ -614,6 +686,7 @@ def test_filter_errors(self, monkeypatch, db_session, meta, error):
         with pytest.raises(APIAbort) as e:
             DatasetsList.filter_query([meta], aliases, query)
         assert e.value.http_status == error
+        assert str(e.value) == message
 
     @pytest.mark.parametrize(
         "exception,error",

From e3b016ee55560a4536c57348ca77fd1a5823fe1a Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Mon, 17 Apr 2023 07:47:56 -0400
Subject: [PATCH 2/6] Minor cleanup

---
 .../server/api/resources/datasets_list.py     |  6 +---
 lib/pbench/test/functional/server/test_put.py | 28 ++++++-------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py
index cf70fb2ddf..618720e78e 100644
--- a/lib/pbench/server/api/resources/datasets_list.py
+++ b/lib/pbench/server/api/resources/datasets_list.py
@@ -95,11 +95,7 @@ def make_operator(
     Returns:
         A SQLAlchemy filter expression.
     """
-    x = getattr(expression, OPERATORS[operator])(value)
-    current_app.logger.info(
-        "Expression type {} yields type {}", type(expression).__name__, type(x).__name__
-    )
-    return x
+    return getattr(expression, OPERATORS[operator])(value)
 
 
 def urlencode_json(json: JSON) -> str:
diff --git a/lib/pbench/test/functional/server/test_put.py b/lib/pbench/test/functional/server/test_put.py
index b5389890ca..a2de16f4c1 100644
--- a/lib/pbench/test/functional/server/test_put.py
+++ b/lib/pbench/test/functional/server/test_put.py
@@ -436,41 +436,30 @@ def test_list_filter_and_or(self, server_client: PbenchServerClient, login_user)
     def test_list_filter_typed(self, server_client: PbenchServerClient, login_user):
         """Test filtering using typed matches.
 
-        We compare the actual `server.deletion` time against what we believe we
-        set when uploading. Note that the metadata validator adds a full day to
-        the specified timestamp and then drops the time of day, effectively
-        rounding up to midnight, so here we add a day to the offset.
+        We set an early "server.deletion" time on alternating datasets as we
+        uploaded them. Now prove that we can use a typed "date" filter to find
+        them. We'll further validate that none of the other datasets we
+        uploaded from TARBALL_DIR should have been returned.
         """
         test_sets = {}
         for t in TARBALL_DIR.glob("*.tar.xz"):
             r = Dataset.md5(t)
             m = server_client.get_metadata(
                 r,
-                [
-                    "dataset.access",
-                    "dataset.name",
-                    "dataset.uploaded",
-                    "server.deletion",
-                ],
+                ["dataset.name", "server.deletion"],
             )
             test_sets[r] = m
         soonish = datetime.now(timezone.utc) + timedelta(days=20)
-        print(
-            f"... find matches for 'server.deletion:<{soonish:%Y-%m-%dT%H:%M%z}:date'"
-        )
 
         datasets = server_client.get_list(
-            metadata=[
-                "dataset.metalog.pbench.script",
-                "dataset.uploaded",
-                "server.deletion",
-            ],
+            metadata=["server.deletion"],
             owner="tester",
-            # NOTE: we use archaic "US standard" date notation here to minimize
+            # NOTE: using weird "US standard" date notation here minimizes
             # the risk of succeeding via a simple alphanumeric comparison.
             filter=[f"server.deletion:<'{soonish:%m/%d/%Y %H:%M%z}':date"],
         )
 
+        # Confirm that the returned datasets match
         for dataset in datasets:
             deletion = utc_from_str(dataset.metadata["server.deletion"])
             assert (
@@ -478,6 +467,7 @@ def test_list_filter_typed(self, server_client: PbenchServerClient, login_user):
             ), f"Filter returned {dataset.name}, with expiration out of range ({deletion:%Y-%m-%d})"
             test_sets.pop(dataset.resource_id, None)
 
+        # Confirm that the remaining TARBALL_DIR datasets don't match
         for r, m in test_sets.items():
             deletion = utc_from_str(m["server.deletion"])
             assert (

From 75ca0286766eea5b373602ddeca0a70974cb707f Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Tue, 18 Apr 2023 10:46:27 -0400
Subject: [PATCH 3/6] Updates

---
 docs/API/V1/list.md                           | 71 +++++++++++++------
 jenkins/run-server-func-tests                 |  4 +-
 lib/pbench/client/__init__.py                 |  9 ++-
 .../server/api/resources/datasets_list.py     | 50 ++++++-------
 lib/pbench/test/functional/server/test_put.py |  2 +-
 .../test/unit/server/test_datasets_list.py    | 67 ++++++++++++++++-
 6 files changed, 145 insertions(+), 58 deletions(-)

diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md
index 7cb6872300..b7790dc339 100644
--- a/docs/API/V1/list.md
+++ b/docs/API/V1/list.md
@@ -45,20 +45,49 @@ specified date.
 
 `filter` metadata filtering \
 Select datasets matching the metadata expressions specified via `filter`
-query parameters. Each expression is the name of a metadata key (for example,
-`dataset.name`), followed by a colon (`:`) and the comparison string. The
-comparison string may be prefixed with a tilde (`~`) to make it a partial
-("contains") comparison instead of an exact match. For example,
-`dataset.name:foo` looks for datasets with the name "foo" exactly, whereas
-`dataset.name:~foo` looks for datasets with a name containing the substring
-"foo".
-
-These may be combined across multiple `filter` query parameters or as
-comma-separated lists in a single query parameter. Multiple filter expressions
-form an `AND` expression, however consecutive filter expressions can be joined
-in an `OR` expression by using the circumflex (`^`) character prior to each.
-(The first expression with `^` begins an `OR` list while the first subsequent
-expression outout `^` ends the `OR` list and is combined with an `AND`.)
+query parameters. Each expression has the format `[chain]key:[op]value[:type]`:
+
+* `chain` Prefix an expression with `^` (circumflex) to allow combining a set
+of expressions with `OR` rather than the default `AND`.
+* `key` The name of a metadata key (for example, `dataset.name`)
+
+* `op` An operator to specify how to compare the key value:
+
+  * `=` (Default) Compare for equality
+  * `~` Compare against a substring
+  * `>` Greater than
+  * `<` Less than
+  * `>=` Greater than or equal to
+  * `<=` Less than or equal to
+  * `!=` Not equal
+
+* `value` The value to compare against. This will be interpreted based on the specified type.
+* `type` The string value will be cast to this type. Any value can be cast to
+type `str`. General metadata keys (`server`, `global`, `user`, and
+`dataset.metalog` namespaces) that have values incompatible with the specified
+type will be ignored. If you specify an incompatible type for a primary
+`dataset` key, an error will be returned as these types are defined by the
+Pbench schema so no match would be possible. (For example, `dataset.name:2:int`
+or `dataset.access:2023-05-01:date`.)
+
+  * `str` (Default) Compare as a string
+  * `bool` Compare as a boolean
+  * `int` Compare as an integer
+  * `date` Compare as a date-time string. ISO-8601 recommended, and UTC is
+  assumed if no timezone is specified.
+
+For example, `dataset.name:foo` looks for datasets with the name "foo" exactly,
+whereas `dataset.name:~foo` looks for datasets with a name containing the
+substring "foo".
+
+Multiple expressions may be combined across multiple `filter` query parameters
+or as comma-separated lists in a single query parameter. Multiple filter
+expressions are combined as an `AND` expression, matching only when all
+expressions match. However a set of consecutive filter expressions can form
+an `OR` expression by using the circumflex (`^`) "chain" character on each
+expression term. The first expression with `^` begins an `OR` list while the
+first subsequent expression without `^` ends the `OR` list and is combined with
+the nested `OR` expression as an `AND`.
 
 For example,
 - `filter=dataset.name:a,server.origin:EC2` returns datasets with a name of
@@ -67,12 +96,14 @@ For example,
 returns datasets with a name of "a" and *either* an origin of "EC2" or generated
 from the "pbench-fio" script.
 
-_NOTE_: `filter` expression values, like the `true` in
-`GET /api/v1/datasets?filter=server.archiveonly:true`, are always interpreted
-as strings, so be careful about the string representation of the value (in this
-case, a boolean, which is represented in JSON as `true` or `false`). Beware
-especially when attempting to match a JSON document (such as
-`dataset.metalog.pbench`).
+_NOTE_: `filter` expression term values, like the `true` in
+`GET /api/v1/datasets?filter=server.archiveonly:true`, are by default
+interpreted as strings, so be careful about the string representation of the
+value. In this case, `server.archiveonly` is a boolean, which will be matched
+as a string value `true` or `false`. You can instead specify the expression
+term as `server.archiveonly:t:bool` which will treat the specified match value
+as a boolean (`t[rue]` or `y[es]` for true, `f[alse]` or `n[o]` for false) and
+match against the boolean metadata value.
 
 `keysummary` boolean \
 Instead of displaying a list of selected datasets and metadata, use the set of
diff --git a/jenkins/run-server-func-tests b/jenkins/run-server-func-tests
index 09708af384..ccd6a49647 100755
--- a/jenkins/run-server-func-tests
+++ b/jenkins/run-server-func-tests
@@ -33,7 +33,6 @@ function cleanup {
         # Remove the Pbench Server container and the dependencies pod which we
         # just created and ran; remove any dangling containers; and then remove
         # any dangling images.
-        dump_journal
         echo "Forcefully removing the Pbench Server container..." >&2
         podman rm --force --ignore ${PB_SERVER_CONTAINER_NAME}
         echo "Forcefully removing the Pbench Support Services pod..." >&2
@@ -67,6 +66,7 @@ until curl -s -o /dev/null ${SERVER_API_ENDPOINTS}; do
     if [[ $(date +"%s") -ge ${end_in_epoch_secs} ]]; then
         echo "Timed out waiting for the reverse proxy to show up!" >&2
         exit_status=1
+        dump_journal
         exit ${exit_status}
     fi
     sleep 1
@@ -92,8 +92,8 @@ else
 fi
 
 if [[ ${exit_status} -ne 0 ]]; then
-    printf -- "\nFunctional tests exited with code %s\n" ${exit_status}
     dump_journal
+    printf -- "\nFunctional tests exited with code %s\n" ${exit_status}
 fi
 
 if [[ -z "${cleanup_flag}" ]]; then
diff --git a/lib/pbench/client/__init__.py b/lib/pbench/client/__init__.py
index c7135f9a14..d9358dcfa0 100644
--- a/lib/pbench/client/__init__.py
+++ b/lib/pbench/client/__init__.py
@@ -490,15 +490,14 @@ def update(
             params=params,
         ).json()
 
-    def get_settings(self, key: Optional[str] = None) -> JSONOBJECT:
-        """Return requested metadata for a specified dataset.
+    def get_settings(self, key: str = "") -> JSONOBJECT:
+        """Return requested server setting.
 
         Args:
-            dataset_id: the resource ID of the targeted dataset
-            metadata: a list of metadata keys to return
+            key: A server settings key; if omitted, return all settings
 
         Returns:
             A JSON document containing the requested key values
         """
-        params = {"key": key} if key else None
+        params = {"key": key}
         return self.get(api=API.SERVER_SETTINGS, uri_params=params).json()
diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py
index 618720e78e..a81825ce4e 100644
--- a/lib/pbench/server/api/resources/datasets_list.py
+++ b/lib/pbench/server/api/resources/datasets_list.py
@@ -43,10 +43,10 @@
 
 @dataclass
 class Type:
-    """Link query filter type data
+    """Help keep track of filter types
 
-    Link the filter type name, the equivalent SQLAlchemy type, and the Pbench
-    API conversion method to create a compatible object from a string.
+    Used to link a filter type name to the equivalent SQLAlchemy type and the
+    Pbench API conversion method to create a compatible object from a string.
 
     Fields:
         sqtype: The base SQLAlchemy type corresponding to a Python target type
@@ -112,7 +112,9 @@ class Term:
     def __init__(self, term: str):
         self.chain = None
         self.key = None
+        self.operator = None
         self.value = None
+        self.type = None
         self.term = term
         self.buffer = term
         self.logger = logging.getLogger("term")
@@ -171,29 +173,25 @@ def _next_token(self, optional: bool = False) -> str:
         else:
             end = buffer.find(":")
             if end < 0:
-                if not optional:
-                    raise APIAbort(
-                        HTTPStatus.BAD_REQUEST, f"Missing terminator for {buffer!r}"
-                    )
                 next = buffer
                 end = len(buffer)
             else:
                 next = buffer[:end]
         buffer = buffer[end:]
-        if not optional and (len(buffer) == 0 or not buffer.startswith(":")):
-            self.logger.info(
-                "[optional %s, len %d, next %s]", optional, len(buffer), buffer[0]
+        if buffer.startswith(":"):
+            buffer = buffer[1:]
+        elif not optional:
+            raise APIAbort(
+                HTTPStatus.BAD_REQUEST, f"Missing ':' terminator in {next!r}"
             )
-            raise APIAbort(HTTPStatus.BAD_REQUEST, f"Missing terminator for {buffer!r}")
-        buffer = buffer[1:]
         self.buffer = buffer
         return next
 
     def parse_filter(self) -> "Term":
-        """Parse a filter term like "server.deletion:[<op>]value:type"
+        """Parse a filter term like "<key>:[<op>]value[:type]"
 
-        Returns a dictionary with "native_key", "full_key", "operator" (default
-        "="), "value", and "type" fields.
+        Returns a dictionary with "key", "operator" (default "="), "value", and
+        "type" fields.
 
         The key and value can be quoted to include mixed case and symbols,
         including the ":"character, by starting and starting and ending the key
@@ -366,7 +364,7 @@ def filter_query(
         <   Look for matches strictly less than the specified value
         >=  Look for matches greater than or equal to the specified value
         <=  Look for matches less than or equal to the specified value
-        !=  Look for matches not strictly equal to the specified value
+        !=  Look for matches not equal to the specified value
 
         After the value, you can optionally specify a type for the comparison.
         Note that an incompatible type (other than the default "str") for a
@@ -378,9 +376,9 @@ def filter_query(
         int     Perform an integer match
         bool    Perform a boolean match (boolean values are t[rue], f[alse],
                 y[es], and n[o])
-        date    Perform a date match: the selected key must have a string value
-                in ISO-8601 format, and will be interpreted as UTC if no time
-                zone is specified.
+        date    Perform a date match: the selected key must have a representing
+                a date-time string (ISO-8601 preferred). UTC is assumed if no
+                timezone is specified.
 
         For example
 
@@ -442,10 +440,8 @@ def filter_query(
         or_list = []
         and_list = []
         for kw in filters:
-            combine_or = False
             term = Term(kw).parse_filter()
-            if term.chain == "^":
-                combine_or = True
+            combine_or = term.chain == "^"
             keys = term.key.split(".")
             native_key = keys.pop(0).lower()
             vtype = term.type if term.type else "str"
@@ -469,7 +465,7 @@ def filter_query(
                             if not isinstance(c.type, TYPES[vtype].sqltype):
                                 raise APIAbort(
                                     HTTPStatus.BAD_REQUEST,
-                                    f"Type {vtype} of value {value} is not compatible with dataset column {c.name}",
+                                    f"Type {vtype!r} of value {value!r} is not compatible with dataset column {c.name}",
                                 )
                             column = c
                     except AttributeError as e:
@@ -492,12 +488,8 @@ def filter_query(
             # check explicitly for None. I.e., "we have no filter" rather than
             # "the evaluated result of this filter is falsey".
             if filter is None:
-                expression = (
-                    aliases[native_key]
-                    .value[keys]
-                    .as_string()
-                    .cast(TYPES[vtype].sqltype)
-                )
+                expression = aliases[native_key].value[keys].as_string()
+                expression = expression.cast(TYPES[vtype].sqltype)
                 filter = make_operator(expression, term.operator, value)
 
             if combine_or:
diff --git a/lib/pbench/test/functional/server/test_put.py b/lib/pbench/test/functional/server/test_put.py
index a2de16f4c1..f868c3ee30 100644
--- a/lib/pbench/test/functional/server/test_put.py
+++ b/lib/pbench/test/functional/server/test_put.py
@@ -449,7 +449,7 @@ def test_list_filter_typed(self, server_client: PbenchServerClient, login_user):
                 ["dataset.name", "server.deletion"],
             )
             test_sets[r] = m
-        soonish = datetime.now(timezone.utc) + timedelta(days=20)
+        soonish = datetime.now(timezone.utc) + timedelta(days=SHORT_EXPIRATION_DAYS * 2)
 
         datasets = server_client.get_list(
             metadata=["server.deletion"],
diff --git a/lib/pbench/test/unit/server/test_datasets_list.py b/lib/pbench/test/unit/server/test_datasets_list.py
index 60692054ab..616bfdf7c9 100644
--- a/lib/pbench/test/unit/server/test_datasets_list.py
+++ b/lib/pbench/test/unit/server/test_datasets_list.py
@@ -647,7 +647,17 @@ def test_user_no_auth(self, monkeypatch, db_session):
             (
                 "global.x=3",
                 HTTPStatus.BAD_REQUEST,
-                "Missing terminator for 'global.x=3'",
+                "Missing ':' terminator in 'global.x=3'",
+            ),
+            (
+                "'global.x'",
+                HTTPStatus.BAD_REQUEST,
+                "Missing ':' terminator in 'global.x'",
+            ),
+            (
+                "dataset.x':v",
+                HTTPStatus.BAD_REQUEST,
+                'Metadata key "dataset.x\'" is not supported',
             ),
             (
                 "dataset.notright:10",
@@ -688,6 +698,61 @@ def test_filter_errors(self, monkeypatch, client, meta, error, message):
         assert e.value.http_status == error
         assert str(e.value) == message
 
+    @pytest.mark.parametrize(
+        "query,results",
+        [
+            ("global.legacy:t:bool", ["drb"]),
+            ("global.legacy:>2:int", ["fio_2"]),
+            ("global.legacy:>2023-05-01:date", []),
+        ],
+    )
+    def test_mismatched_json_cast(self, query_as, server_config, query, results):
+        """Verify DB engine behavior for mismatched metadata casts.
+
+        Verify that a typed filter ignores datasets where the metadata key
+        type isn't compatible with the implicit cast.
+        """
+        drb = Dataset.query(name="drb")
+        fio_1 = Dataset.query(name="fio_1")
+        fio_2 = Dataset.query(name="fio_2")
+
+        Metadata.setvalue(dataset=drb, key="global.legacy", value=True)
+        Metadata.setvalue(dataset=fio_1, key="global.legacy.server", value="ABC")
+        Metadata.setvalue(dataset=fio_2, key="global.legacy", value=4)
+
+        response = query_as(
+            {"filter": query, "metadata": ["dataset.uploaded"]},
+            "drb",
+            HTTPStatus.OK,
+        )
+        self.compare_results(response.json, results, {}, server_config)
+
+    @pytest.mark.parametrize(
+        "query,message",
+        [
+            (
+                "dataset.name:t:bool",
+                "Type 'bool' of value True is not compatible with dataset column name",
+            ),
+            (
+                "dataset.uploaded:>2:int",
+                "Type 'int' of value 2 is not compatible with dataset column uploaded",
+            ),
+        ],
+    )
+    def test_mismatched_dataset_cast(self, query_as, server_config, query, message):
+        """Verify DB engine behavior for mismatched metadata casts.
+
+        Verify that a typed filter ignores datasets where the metadata key
+        type isn't compatible with the implicit cast.
+        """
+        response = query_as(
+            {"filter": query, "metadata": ["dataset.uploaded"]},
+            "drb",
+            HTTPStatus.BAD_REQUEST,
+        )
+        assert response.json["message"] == message
+
     @pytest.mark.parametrize(
         "exception,error",
         (

From fd563bbada910ebd1d95ce1d1705dd221d922e48 Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Wed, 19 Apr 2023 16:01:02 -0400
Subject: [PATCH 4/6] Add `name` documentation!

---
 docs/API/V1/list.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md
index b7790dc339..3b4f550c64 100644
--- a/docs/API/V1/list.md
+++ b/docs/API/V1/list.md
@@ -136,6 +136,10 @@ Allows filtering for datasets owned by the authenticated client (if the value
 is omitted, e.g., `?mine` or `?mine=true`) or owned by *other* users (e.g.,
 `?mine=false`).
 
+`name` string \
+Select only datasets with a specified substring in their name. The filter
+`?name=fio` is semantically equivalent to `?filter=dataset.name:~fio`.
+
 `offset` integer \
 "Paginate" the selected datasets by skipping the first `offset` datasets that
 would have been selected by the other query terms. This can be used with

From ced6fc56a2d09bad5ca5efc1dc2c2e00b93c5e2d Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Thu, 20 Apr 2023 16:07:39 -0400
Subject: [PATCH 5/6] More tweaks

---
 docs/API/V1/list.md                           | 19 +++++++-------
 .../server/api/resources/datasets_list.py     | 25 +++++++++++++------
 .../test/unit/server/test_datasets_list.py    | 10 ++++----
 3 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md
index 3b4f550c64..66868118c0 100644
--- a/docs/API/V1/list.md
+++ b/docs/API/V1/list.md
@@ -83,24 +83,25 @@ substring "foo".
 Multiple expressions may be combined across multiple `filter` query parameters
 or as comma-separated lists in a single query parameter. Multiple filter
 expressions are combined as an `AND` expression, matching only when all
-expressions match. However a set of consecutive filter expressions can form
-an `OR` expression by using the circumflex (`^`) "chain" character on each
-expression term. The first expression with `^` begins an `OR` list while the
-first subsequent expression without `^` ends the `OR` list and is combined with
-the nested `OR` expression as an `AND`.
+expressions match. However any consecutive set of expressions starting with `^`
+are collected into an "`OR` list" that will be `AND`-ed with the surrounding
+terms.
 
 For example,
 - `filter=dataset.name:a,server.origin:EC2` returns datasets with a name of
 "a" and an origin of "EC2".
-- `filter=dataset.name:a,^server.origin:EC2,^dataset.metalog.pbench.script:fio`
-returns datasets with a name of "a" and *either* an origin of "EC2" or generated
-from the "pbench-fio" script.
+- `filter=dataset.name:~andy,^server.origin:EC2,^server.origin:RIYA,
+dataset.access:public`
+returns only "public" datasets with a name containing the string "andy" which also
+have an origin of either "EC2" or "RIYA". As a SQL query, we might write it
+as `dataset.name like "%andy%" and (server.origin = 'EC2' or
+server.origin = 'RIYA') and dataset.access = 'public'`.
 
 _NOTE_: `filter` expression term values, like the `true` in
 `GET /api/v1/datasets?filter=server.archiveonly:true`, are by default
 interpreted as strings, so be careful about the string representation of the
 value. In this case, `server.archiveonly` is a boolean, which will be matched
-as a string value `true` or `false`. You can instead specify the expression
+as a string value "true" or "false". You can instead specify the expression
 term as `server.archiveonly:t:bool` which will treat the specified match value
 as a boolean (`t[rue]` or `y[es]` for true, `f[alse]` or `n[o]` for false) and
 match against the boolean metadata value.
diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py
index a81825ce4e..76eed314f9 100644
--- a/lib/pbench/server/api/resources/datasets_list.py
+++ b/lib/pbench/server/api/resources/datasets_list.py
@@ -119,6 +119,15 @@ def __init__(self, term: str):
         self.buffer = term
         self.logger = logging.getLogger("term")
 
+    @classmethod
+    def parse(cls, term: str) -> "Term":
+        """Factory method to construct an instance and parse a string
+
+        Args:
+            term: A filter expression to parse
+        """
+        return cls(term).parse_filter()
+
     def _remove_prefix(
         self, prefixes: tuple[str], default: Optional[str] = None
     ) -> Optional[str]:
@@ -188,7 +197,7 @@ def _next_token(self, optional: bool = False) -> str:
         return next
 
     def parse_filter(self) -> "Term":
-        """Parse a filter term like "<key>:[<op>]value[:type]"
+        """Parse a filter term like "<key>:[<op>]<value>[:<type>]"
 
         Returns a dictionary with "key", "operator" (default "="), "value", and
         "type" fields.
@@ -219,7 +228,7 @@ def parse_filter(self) -> "Term":
         self.value = self._next_token(optional=True)
 
         # The comparison type, defaults to "str"
-        self.type = self.buffer.lower()
+        self.type = self.buffer.lower() if self.buffer else "str"
         if self.type and self.type not in TYPES:
             raise APIAbort(
                 HTTPStatus.BAD_REQUEST,
@@ -376,9 +385,9 @@ def filter_query(
         int     Perform an integer match
         bool    Perform a boolean match (boolean values are t[rue], f[alse],
                 y[es], and n[o])
-        date    Perform a date match: the selected key must have a representing
-                a date-time string (ISO-8601 preferred). UTC is assumed if no
-                timezone is specified.
+        date    Perform a date match: the selected key value (and supplied
+                filter value) must be strings representing a date-time, ideally
+                in ISO-8601 format. UTC is assumed if no timezone is specified.
 
         For example
 
@@ -440,11 +449,11 @@ def filter_query(
         or_list = []
         and_list = []
         for kw in filters:
-            term = Term(kw).parse_filter()
+            term = Term.parse(kw)
             combine_or = term.chain == "^"
             keys = term.key.split(".")
             native_key = keys.pop(0).lower()
-            vtype = term.type if term.type else "str"
+            vtype = term.type
             value = TYPES[vtype].convert(term.value, None)
             filter = None
 
@@ -465,7 +474,7 @@ def filter_query(
                             if not isinstance(c.type, TYPES[vtype].sqltype):
                                 raise APIAbort(
                                     HTTPStatus.BAD_REQUEST,
-                                    f"Type {vtype!r} of value {value!r} is not compatible with dataset column {c.name}",
+                                    f"Filter of type {vtype!r} is not compatible with key 'dataset.{c.name}'",
                                 )
                             column = c
                     except AttributeError as e:
diff --git a/lib/pbench/test/unit/server/test_datasets_list.py b/lib/pbench/test/unit/server/test_datasets_list.py
index 616bfdf7c9..6e15a162ba 100644
--- a/lib/pbench/test/unit/server/test_datasets_list.py
+++ b/lib/pbench/test/unit/server/test_datasets_list.py
@@ -710,7 +710,7 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
         """Verify DB engine behavior for mismatched metadata casts.
 
         Verify that a typed filter ignores datasets where the metadata key
-        type isn't compatible with the implicit cast.
+        type isn't compatible with the required cast.
         """
         drb = Dataset.query(name="drb")
         fio_1 = Dataset.query(name="fio_1")
@@ -732,19 +732,19 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
         [
             (
                 "dataset.name:t:bool",
-                "Type 'bool' of value True is not compatible with dataset column name",
+                "Filter of type 'bool' is not compatible with key 'dataset.name'",
             ),
             (
                 "dataset.uploaded:>2:int",
-                "Type 'int' of value 2 is not compatible with dataset column uploaded",
+                "Filter of type 'int' is not compatible with key 'dataset.uploaded'",
             ),
         ],
     )
     def test_mismatched_dataset_cast(self, query_as, server_config, query, message):
         """Verify DB engine behavior for mismatched metadata casts.
 
-        Verify that a typed filter ignores datasets where the metadata key
-        type isn't compatible with the implicit cast.
+        Verify that a typed filter generates an error when it targets a primary
+        dataset key with an incompatible type.
         """
         response = query_as(
             {"filter": query, "metadata": ["dataset.uploaded"]},

From 2a59acb4c408ed1bb73d0065e951959e9bf3ec81 Mon Sep 17 00:00:00 2001
From: David Butenhof <dbutenho@redhat.com>
Date: Fri, 21 Apr 2023 07:24:12 -0400
Subject: [PATCH 6/6] A few tweaks more

---
 jenkins/run-server-func-tests                    |  2 +-
 lib/pbench/server/api/resources/datasets_list.py | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/jenkins/run-server-func-tests b/jenkins/run-server-func-tests
index ccd6a49647..c55ab3417f 100755
--- a/jenkins/run-server-func-tests
+++ b/jenkins/run-server-func-tests
@@ -93,7 +93,7 @@ fi
 
 if [[ ${exit_status} -ne 0 ]]; then
     dump_journal
-    printf -- "\nFunctional tests exited with code %s\n" ${exit_status}
+    printf -- "\nFunctional tests exited with code %s\n" ${exit_status} >&2
 fi
 
 if [[ -z "${cleanup_flag}" ]]; then
diff --git a/lib/pbench/server/api/resources/datasets_list.py b/lib/pbench/server/api/resources/datasets_list.py
index 76eed314f9..af49172bba 100644
--- a/lib/pbench/server/api/resources/datasets_list.py
+++ b/lib/pbench/server/api/resources/datasets_list.py
@@ -197,16 +197,20 @@ def _next_token(self, optional: bool = False) -> str:
         return next
 
     def parse_filter(self) -> "Term":
-        """Parse a filter term like "<key>:[<op>]<value>[:<type>]"
+        """Parse a filter term like "<key>:[<operator>]<value>[:<type>]"
 
-        Returns a dictionary with "key", "operator" (default "="), "value", and
-        "type" fields.
+        Separates the "key", "operator", "value", and "type" fields of a filter
+        expression.
 
         The key and value can be quoted to include mixed case and symbols,
-        including the ":"character, by starting and starting and ending the key
-        or value with the "'" or '"' character, e.g.,
+        including the ":" character, by starting and starting and ending the
+        key or value with the "'" or '"' character, e.g.,
         "'dataset.metalog.tool/iteration:1':'foo:1#bar':str".
 
+        The "operator", if omitted, defaults to "=", and "type" defaults to
+        "str" (string), so that `dataset.name:foo` has the same effect as
+        `dataset.name:=foo:str`.
+
         Returns:
             self
         """