diff --git a/lib/pbench/server/api/resources/__init__.py b/lib/pbench/server/api/resources/__init__.py index 9e9db68fc7..b5ea4999aa 100644 --- a/lib/pbench/server/api/resources/__init__.py +++ b/lib/pbench/server/api/resources/__init__.py @@ -4,6 +4,7 @@ from http import HTTPStatus import json from json.decoder import JSONDecodeError +import re from typing import Any, Callable, Dict, List, NamedTuple, Optional, Union import uuid @@ -427,9 +428,11 @@ def convert_json(value: JSONOBJECT, parameter: "Parameter") -> JSONOBJECT: """Validate a parameter of JSON type. If the Parameter object defines a list of keywords, the JSON key values are - validated against that list. If the Parameter key_path attribute is set, - then only the first element of a dotted path (e.g., "user" for - "user.contact.email") is validated. + validated against that list. + + If the Parameter key_path attribute is set, validate that the key is legal + and if the value is a JSON object (dict) that all keys within the JSON are + legal. Args: value : JSON dict @@ -442,6 +445,25 @@ def convert_json(value: JSONOBJECT, parameter: "Parameter") -> JSONOBJECT: Returns: The JSON dict """ + _valid_key = re.compile(r"[a-z0-9_-]+") + + def keysafe(value: dict[str, Any]) -> list[str]: + """Test whether nested JSON keys are legal. + + Args: + value: A JSON structure + + Returns: + A list of bad keys (empty if all are good) + """ + bad = [] + for k, v in value.items(): + if not _valid_key.fullmatch(k): + bad.append(k) + if isinstance(v, dict): + bad.extend(keysafe(v)) + return bad + try: washed = json.loads(json.dumps(value)) except JSONDecodeError as e: @@ -454,12 +476,17 @@ def convert_json(value: JSONOBJECT, parameter: "Parameter") -> JSONOBJECT: if parameter.keywords: bad = [] - for k in value.keys(): - if parameter.key_path: - if not Metadata.is_key_path(k, parameter.keywords): + try: + for k, v in value.items(): + if parameter.key_path: + if not Metadata.is_key_path(k, parameter.keywords): + bad.append(k) + if isinstance(v, dict): + bad.extend(keysafe(v)) + elif k not in parameter.keywords: bad.append(k) - elif k not in parameter.keywords: - bad.append(k) + except Exception as e: + raise ConversionError(value, "JSON") from e if bad: raise KeywordError(parameter, f"JSON key{'s' if len(bad) > 1 else ''}", bad) diff --git a/lib/pbench/test/unit/server/test_datasets_metadata.py b/lib/pbench/test/unit/server/test_datasets_metadata.py index eb3e64e3dc..7e758a1f83 100644 --- a/lib/pbench/test/unit/server/test_datasets_metadata.py +++ b/lib/pbench/test/unit/server/test_datasets_metadata.py @@ -1,5 +1,4 @@ from http import HTTPStatus -import re import pytest import requests @@ -331,26 +330,27 @@ def test_put_no_dataset(self, client, server_config, attach_dataset): assert response.json == {"message": "Dataset 'foobar' not found"} @pytest.mark.parametrize( - "keys,message", + "keys,keyerr", ( ( {"xyzzy": "private", "what": "sup", "global.saved": True}, "'what', 'xyzzy'", ), + ({"global": {"Ab.foo": True}}, "'Ab.foo'"), + ({"global": {"ab@": True}}, "'ab@'"), + ({"global": {"abc": {"#$": "bad key"}}}, "'#$'"), ({"global.AbC@foo=y": True}, "'global.AbC@foo=y'"), ({"global..foo": True}, "'global..foo'"), ), ) - def test_put_bad_keys(self, client, server_config, attach_dataset, keys, message): + def test_put_bad_keys(self, client, server_config, attach_dataset, keys, keyerr): response = client.put( f"{server_config.rest_uri}/datasets/drb/metadata", json={"metadata": keys}, ) - assert response.status_code == HTTPStatus.BAD_REQUEST - assert re.match( - f"Unrecognized JSON keys? \\[{message}\\] for parameter metadata.", - response.json["message"], - ) + assert response.status_code == HTTPStatus.BAD_REQUEST, response.json["message"] + msg = response.json["message"] + assert "Unrecognized JSON key" in msg and keyerr in msg def test_put_reserved_metadata(self, client, server_config, attach_dataset): response = client.put( @@ -511,31 +511,41 @@ def test_put_set_errors(self, capinternal, monkeypatch, query_get_as, query_put_ def test_put(self, query_get_as, query_put_as): response = query_put_as( "drb", - {"metadata": {"global.seen": False, "global.saved": True}}, + {"metadata": {"global.seen": False, "global.under-score_hyphen": True}}, "drb", HTTPStatus.OK, ) assert response.json == { - "metadata": {"global.saved": True, "global.seen": False}, + "metadata": {"global.under-score_hyphen": True, "global.seen": False}, "errors": {}, } response = query_get_as( "drb", {"metadata": "global,dataset.access"}, "drb", HTTPStatus.OK ) assert response.json == { - "global": {"contact": "me@example.com", "saved": True, "seen": False}, + "global": { + "contact": "me@example.com", + "under-score_hyphen": True, + "seen": False, + }, "dataset.access": "private", } # Try a second GET, returning "global" fields separately: response = query_get_as( "drb", - {"metadata": ["global.seen", "global.saved", "dataset.access"]}, + { + "metadata": [ + "global.seen", + "global.under-score_hyphen", + "dataset.access", + ] + }, "drb", HTTPStatus.OK, ) assert response.json == { - "global.saved": True, + "global.under-score_hyphen": True, "global.seen": False, "dataset.access": "private", } @@ -565,7 +575,7 @@ def test_put(self, query_get_as, query_put_as): assert audit[1].user_name == "drb" assert audit[1].reason is None assert audit[1].attributes == { - "updated": {"global.seen": False, "global.saved": True} + "updated": {"global.seen": False, "global.under-score_hyphen": True} } def test_put_user(self, query_get_as, query_put_as): diff --git a/lib/pbench/test/unit/server/test_schema.py b/lib/pbench/test/unit/server/test_schema.py index e5bc8045e0..df8668b796 100644 --- a/lib/pbench/test/unit/server/test_schema.py +++ b/lib/pbench/test/unit/server/test_schema.py @@ -334,7 +334,8 @@ def test_boolean(self, value, expected): ("me.You.him", False, "me.you.him"), ("ME.US.HER.THEM", False, "me.us.her.them"), ("me.a.b.fooBar:test@y=z", True, "me.a.b.fooBar:test@y=z"), - ("yes.a7.b2.foo", False, "yes.a7.b2.foo"), + ("yes.a-7.b_2.foo", False, "yes.a-7.b_2.foo"), + ("yes.a_7.b-2.@foo", True, "yes.a_7.b-2.@foo"), ("me.;baR.#f0o.x@y=10", True, "me.;baR.#f0o.x@y=10"), ), ) @@ -353,8 +354,9 @@ def test_keyword_namespace(self, input, funky, expected): "input,funky", ( ("me..you", False), + ("me..you", True), ("me.#You.him", False), - ("them..us", True), + ("them.us", True), ("a.b.fooBar:test@y=z", True), ("wow.a7.b2.foo", False), ),