Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support limited non-alphanumeric metadata keys #3354

Merged
merged 4 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 56 additions & 27 deletions lib/pbench/server/api/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from http import HTTPStatus
import json
from json.decoder import JSONDecodeError
import re
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Union
import uuid

Expand All @@ -25,7 +26,6 @@
Dataset,
DatasetNotFound,
Metadata,
MetadataBadKey,
MetadataError,
OperationName,
)
Expand Down Expand Up @@ -428,9 +428,11 @@ def convert_json(value: JSONOBJECT, parameter: "Parameter") -> JSONOBJECT:
"""Validate a parameter of JSON type.

If the Parameter object defines a list of keywords, the JSON key values are
validated against that list. If the Parameter key_path attribute is set,
then only the first element of a dotted path (e.g., "user" for
"user.contact.email") is validated.
validated against that list.

If the Parameter key_path attribute is set, validate that the key is legal
and if the value is a JSON object (dict) that all keys within the JSON are
legal.

Args:
value : JSON dict
Expand All @@ -443,6 +445,25 @@ def convert_json(value: JSONOBJECT, parameter: "Parameter") -> JSONOBJECT:
Returns:
The JSON dict
"""
_valid_key = re.compile(r"[a-z0-9_-]+")

def keysafe(value: dict[str, Any]) -> list[str]:
"""Test whether nested JSON keys are legal.

Args:
value: A JSON structure

Returns:
A list of bad keys (empty if all are good)
"""
bad = []
for k, v in value.items():
if not _valid_key.fullmatch(k):
bad.append(k)
if isinstance(v, dict):
bad.extend(keysafe(v))
return bad
webbnh marked this conversation as resolved.
Show resolved Hide resolved

try:
washed = json.loads(json.dumps(value))
except JSONDecodeError as e:
Expand All @@ -455,12 +476,17 @@ def convert_json(value: JSONOBJECT, parameter: "Parameter") -> JSONOBJECT:

if parameter.keywords:
bad = []
for k in value.keys():
if parameter.key_path:
if not Metadata.is_key_path(k, parameter.keywords):
try:
for k, v in value.items():
if parameter.key_path:
if not Metadata.is_key_path(k, parameter.keywords):
bad.append(k)
if isinstance(v, dict):
bad.extend(keysafe(v))
elif k not in parameter.keywords:
bad.append(k)
elif k not in parameter.keywords:
bad.append(k)
except Exception as e:
raise ConversionError(value, "JSON") from e
if bad:
raise KeywordError(parameter, f"JSON key{'s' if len(bad) > 1 else ''}", bad)

Expand Down Expand Up @@ -546,10 +572,12 @@ def convert_keyword(value: str, parameter: "Parameter") -> Union[str, Enum]:
except ValueError as e:
raise KeywordError(parameter, "enum", [value]) from e

keyword = value.lower()
keyword = value if parameter.metalog_ok else value.lower()
if not parameter.keywords:
return keyword
elif parameter.key_path and Metadata.is_key_path(keyword, parameter.keywords):
elif parameter.key_path and Metadata.is_key_path(
keyword, parameter.keywords, metalog_key_ok=parameter.metalog_ok
):
return keyword
elif keyword in parameter.keywords:
return keyword
Expand Down Expand Up @@ -715,6 +743,7 @@ def __init__(
key_path: bool = False,
string_list: Optional[str] = None,
enum: Optional[type[Enum]] = None,
metalog_ok: bool = False,
):
"""Initialize a Parameter object describing a JSON parameter with its
type and attributes.
Expand All @@ -731,6 +760,9 @@ def __init__(
will be split into lists.
enum : An Enum subclass to which an upcased keyword should be
converted.
metalog_ok : Normal key_path keywords are alphanumeric; in order to
accommodate `metadata.log` keys, we allow an expanded character
set in restricted cases.
"""
self.name = name
self.type = type
Expand All @@ -740,6 +772,7 @@ def __init__(
self.key_path = key_path
self.string_list = string_list
self.enum = enum
self.metalog_ok = metalog_ok

def invalid(self, json: JSONOBJECT) -> bool:
"""Check whether the value of this parameter in the JSON document is
Expand Down Expand Up @@ -1535,8 +1568,7 @@ def _build_sql_query(
def _get_dataset_metadata(
self, dataset: Dataset, requested_items: List[str]
) -> JSON:
"""Get requested metadata about a specific Dataset and return a JSON
fragment that can be added to other data about the Dataset.
"""Get requested metadata for a specific Dataset

This supports strict Metadata key/value items associated with the
Dataset as well as selected columns from the Dataset model.
Expand All @@ -1554,30 +1586,27 @@ def _get_dataset_metadata(

metadata = {}
for i in requested_items:
if Metadata.is_key_path(i, Metadata.METADATA_KEYS):
native_key = Metadata.get_native_key(i)
user_id = None
if native_key == Metadata.USER:
user_id = Auth.get_current_user_id()
try:
metadata[i] = Metadata.getvalue(
dataset=dataset, key=i, user_id=user_id
)
except MetadataError:
metadata[i] = None
else:
raise MetadataBadKey(i)
native_key = Metadata.get_native_key(i)
user_id = None
if native_key == Metadata.USER:
user_id = Auth.get_current_user_id()
try:
metadata[i] = Metadata.getvalue(dataset=dataset, key=i, user_id=user_id)
except MetadataError:
metadata[i] = None

return metadata

def _set_dataset_metadata(
self, dataset: Dataset, metadata: dict[str, JSONVALUE]
) -> dict[str, str]:
"""Set metadata on a specific Dataset and return a summary of failures.
"""Set metadata on a specific Dataset

This supports strict Metadata key/value items associated with the
Dataset as well as selected columns from the Dataset model.

Errors are collected and returned.

Args:
dataset: Dataset object
metadata: dict of key/value pairs
Expand Down
1 change: 1 addition & 0 deletions lib/pbench/server/api/resources/datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(self, config: PbenchServerConfig):
keywords=Metadata.METADATA_KEYS,
key_path=True,
string_list=",",
metalog_ok=True,
),
),
authorization=ApiAuthorizationType.USER_ACCESS,
Expand Down
1 change: 1 addition & 0 deletions lib/pbench/server/api/resources/datasets_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def __init__(self, config: PbenchServerConfig):
keywords=Metadata.METADATA_KEYS,
key_path=True,
string_list=",",
metalog_ok=True,
)
),
authorization=ApiAuthorizationType.DATASET,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(self, config: PbenchServerConfig):
keywords=Metadata.METADATA_KEYS,
key_path=True,
string_list=",",
metalog_ok=True,
),
),
authorization=ApiAuthorizationType.DATASET,
Expand Down
37 changes: 29 additions & 8 deletions lib/pbench/server/database/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,8 +779,25 @@ def is_key_path(key: str, valid: List[str], metalog_key_ok: bool = False) -> boo
def getvalue(
dataset: Dataset, key: str, user_id: Optional[str] = None
) -> Optional[JSON]:
"""Returns the value of the specified key, which may be a dotted
hierarchical path (e.g., "server.deleted").
"""Returns the value of the specified metadata key.

A metadata key may be a dotted hierarchical path, like "server.deleted"
or "dataset.metalog.pbench.script".

This code assumes that any necessary syntax checks have been made by a
caller. Normally, user key update is restricted to a specific set of
keys and namespaces (first level key like "server" or "global"), and
keys are lowercase and alphanumeric.

In the "dataset.metalog" sub-namespace, keys originate from a Pbench
Agent "metadata.log" config file, which may include non-alphanumeric
keys such as "iterations/1-tcp_stream-64B-1i". Pbench Server APIs may
webbnh marked this conversation as resolved.
Show resolved Hide resolved
allow access to these keys in limited circumstances for display and
filtering, and we don't interfere with that ability here. The namespace
key is always lowercased to find the SQL row; however other keys in the
path are opportunistically lowercased only if an exact match isn't
found. This allows "DATASET.SERVER" to match "dataset.server" but also
allows matching "dataset.metalog.iterations/1-tcp_stream-64B-1i".

The specific value of the dotted key path is returned, not the top
level Metadata object. The full JSON value of a top level key can be
Expand Down Expand Up @@ -812,10 +829,11 @@ def getvalue(
Returns:
Value of the key path
"""
if not Metadata.is_key_path(key, Metadata.METADATA_KEYS):
keys = key.split(".")
# Disallow ".." and trailing "."
if "" in keys:
raise MetadataBadKey(key)
keys = key.lower().split(".")
native_key = keys.pop(0)
native_key = keys.pop(0).lower()
if native_key == "dataset":
value = dataset.as_dict()
else:
Expand All @@ -831,10 +849,13 @@ def getvalue(
# the stored data; let the caller know with an exception.
if type(value) is not dict:
raise MetadataBadStructure(dataset, key, name)
if i not in value:
if i in value:
name = i
elif i.lower() in value:
name = i.lower()
else:
return None
value = value[i]
name = i
value = value[name]
return value

@staticmethod
Expand Down
26 changes: 25 additions & 1 deletion lib/pbench/test/unit/server/query_apis/test_datasets_detail.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pbench.server.api.resources.query_apis.datasets.datasets_detail import (
DatasetsDetail,
)
from pbench.server.database.models.datasets import Dataset, Metadata
from pbench.test.unit.server.conftest import generate_token
from pbench.test.unit.server.query_apis.commons import Commons

Expand Down Expand Up @@ -197,7 +198,29 @@ def test_metadata(
focus on the database dataset metadata... but necessarily has to borrow
much of the setup.
"""
query_params = {"metadata": ["global.seen", "server.deletion"]}

# In order to test that this API supports getting non-alphanumeric
# metalog metadata keys, we decorate the dataset with one. To keep it
# simple, just remove the existing Metadata row and create a new one.
drb = Dataset.query(name="drb")
Metadata.delete(Metadata.get(drb, Metadata.METALOG))
Metadata.create(
dataset=drb,
key=Metadata.METALOG,
value={
"iterations/fooBar=10-what_else@weird": {
"iteration_name": "fooBar=10-what_else@weird"
},
"run": {"controller": "node1.example.com"},
},
)
query_params = {
"metadata": [
"global.seen",
"server.deletion",
"dataset.metalog.iterations/[email protected]_name",
]
}

response_payload = {
"took": 112,
Expand Down Expand Up @@ -319,6 +342,7 @@ def test_metadata(
"serverMetadata": {
"global.seen": None,
"server.deletion": "2022-12-26",
"dataset.metalog.iterations/[email protected]_name": "fooBar=10-what_else@weird",
},
}
assert expected == res_json
Expand Down
61 changes: 56 additions & 5 deletions lib/pbench/test/unit/server/test_datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def test_dataset_paged_list(self, query_as, login, query, results, server_config
query: A JSON representation of the query parameters (these will be
automatically supplemented with a metadata request term)
results: A list of the dataset names we expect to be returned
server_config: The PbenchServerConfig object
"""
query.update({"metadata": ["dataset.uploaded"], "limit": 5})
result = query_as(query, login, HTTPStatus.OK)
Expand All @@ -319,10 +320,6 @@ def test_unauth_dataset_list(self, query_as):

Args:
query_as: A fixture to provide a helper that executes the API call
login: The username as which to perform a query
query: A JSON representation of the query parameters (these will be
automatically supplemented with a metadata request term)
results: A list of the dataset names we expect to be returned
"""
query_as({"access": "private"}, None, HTTPStatus.UNAUTHORIZED)

Expand All @@ -345,7 +342,7 @@ def test_get_bad_keys(self, query_as):
)
}

def test_get_get_errors(self, server_config, query_as):
def test_get_key_errors(self, query_as):
"""Test case reporting key errors

Args:
Expand Down Expand Up @@ -382,6 +379,60 @@ def test_get_get_errors(self, server_config, query_as):
"total": 3,
}

def test_use_funk_metalog_keys(self, query_as):
"""Test funky metadata.log keys

Normally we constrain metadata keys to lowercase alphanumeric strings.
Traditional Pbench Agent `metadata.log` files contain keys constructed
from benchmark iteration values that can contain mixed case and symbol
characters. We allow these keys to be filtered and retrieved, but not
created, so test that we can filter on a funky key value and return
the key.

Args:
query_as: Query helper fixture
"""
fio_1 = Dataset.query(name="fio_1")
Metadata.create(
dataset=fio_1,
key=Metadata.METALOG,
value={
"pbench": {
"date": "2020-02-15T00:00:00",
"config": "test1",
"script": "unit-test",
"name": "fio_1",
},
"iterations/fooBar=10-what_else@weird": {
"iteration_name": "fooBar=10-what_else@weird"
},
"run": {"controller": "node1.example.com"},
},
)
response = query_as(
{
"metadata": "dataset.metalog.iterations/fooBar=10-what_else@weird",
"filter": "dataset.metalog.iterations/[email protected]_name:~10",
webbnh marked this conversation as resolved.
Show resolved Hide resolved
},
"drb",
HTTPStatus.OK,
)
assert response.json == {
"next_url": "",
"results": [
{
"metadata": {
"dataset.metalog.iterations/fooBar=10-what_else@weird": {
"iteration_name": "fooBar=10-what_else@weird"
}
},
"name": "fio_1",
"resource_id": "random_md5_string3",
}
],
"total": 1,
}

def test_get_unknown_keys(self, query_as):
"""Test case requesting non-existent query parameter keys.

Expand Down
Loading