From 77271b60897dc1555198187c73df8afbcb54c8e9 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 5 Sep 2023 16:13:05 -0400 Subject: [PATCH 1/5] fix schema validation prohibiting job execution with empty string input --- CHANGES.rst | 2 ++ tests/wps_restapi/test_processes.py | 15 +++++++++ weaver/processes/convert.py | 6 ++-- weaver/utils.py | 2 +- weaver/wps_restapi/swagger_definitions.py | 38 +++++++++++++++-------- 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b0e3d3039..b369348d0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,6 +23,8 @@ Changes: Fixes: ------ - Fix broken `OpenAPI` schema link references to `OGC API - Processes` repository. +- Fix `Job` creation failing when submitting an empty string as input for a `Process` that allows it due + to schema validation incorrectly preventing it. .. _changes_4.30.1: diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py index 7cd4f0788..90c37ddb8 100644 --- a/tests/wps_restapi/test_processes.py +++ b/tests/wps_restapi/test_processes.py @@ -1951,6 +1951,21 @@ def test_execute_process_no_json_body(self): assert resp.status_code == 400 assert resp.content_type == ContentType.APP_JSON + def test_execute_process_valid_empty_string(self): + """ + Ensure that a process expecting an input string parameter can be provided as empty (not resolved as "missing"). + """ + path = f"/processes/{self.process_public.identifier}/jobs" + data = self.get_process_execute_template(test_input="") + + with contextlib.ExitStack() as stack: + for exe in mocked_process_job_runner(): + stack.enter_context(exe) + resp = self.app.post_json(path, params=data, headers=self.json_headers) + assert resp.status_code == 201, "Expected job submission without inputs created without error." + job = self.job_store.fetch_by_id(resp.json["jobID"]) + assert job.inputs[0]["data"] == "", "Input value should be an empty string." + def test_execute_process_missing_required_params(self): """ Validate execution against missing parameters. diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 52e33b1de..58a216b5b 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -2861,10 +2861,10 @@ def wps2json_job_payload(wps_request, wps_process): for input_value in input_list: input_data = input_value.get("data") input_href = input_value.get("href") - if input_data: - data["inputs"].append({"id": iid, "data": input_data}) - elif input_href: + if input_href: # when href is provided, it must always be non-empty data["inputs"].append({"id": iid, "href": input_href}) + else: # no check if value to allow possible empty string, numeric zero or explicit null + data["inputs"].append({"id": iid, "data": input_data}) output_ids = list(wps_request.outputs) for output in wps_process.outputs: oid = output.identifier diff --git a/weaver/utils.py b/weaver/utils.py index b5d07d294..1e57a1748 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -3488,7 +3488,7 @@ def clean_json_text_body(body, remove_newlines=True, remove_indents=True): """ # cleanup various escape characters and u'' stings replaces = [(",\n", ", "), ("\\n", " "), (" \n", " "), ("\n'", "'"), ("\"", "\'"), - ("u\'", "\'"), ("u\"", "\'"), ("\'\'", "\'"), ("'. ", ""), ("'. '", ""), + ("u\'", "\'"), ("u\"", "\'"), ("'. ", ""), ("'. '", ""), ("}'", "}"), ("'{", "{")] if remove_indents: replaces.extend([("\\", " "), (" ", " ")]) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 11ba8097f..6d218b21a 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -1314,8 +1314,6 @@ class BoundingBoxInputType(ExtendedMappingSchema): supportedCRS = SupportedCRSList() -# FIXME: support byte/binary type (string + format:byte) ? -# https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/binaryInputValue.yaml class AnyLiteralType(OneOfKeywordSchema): """ Submitted values that correspond to literal data. @@ -1326,10 +1324,29 @@ class AnyLiteralType(OneOfKeywordSchema): - :class:`AnyLiteralDefaultType` """ _one_of = [ - ExtendedSchemaNode(Float(), description="Literal data type representing a floating point number."), - ExtendedSchemaNode(Integer(), description="Literal data type representing an integer number."), - ExtendedSchemaNode(Boolean(), description="Literal data type representing a boolean flag."), - ExtendedSchemaNode(String(), description="Literal data type representing a generic string."), + ExtendedSchemaNode( + Float(), + title="LiteralDataFloat", + description="Literal data type representing a floating point number.", + ), + ExtendedSchemaNode( + Integer(), + title="LiteralDataInteger", + description="Literal data type representing an integer number.", + ), + ExtendedSchemaNode( + Boolean(), + title="LiteralDataBoolean", + description="Literal data type representing a boolean flag.", + ), + ExtendedSchemaNode( + # FIXME: support byte/binary type (string + format:byte) ? + # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/binaryInputValue.yaml + # see if we can use 'encoding' parameter available for below 'String' schema-type to handle this? + String(allow_empty=True), # valid to submit a process with empty string + title="LiteralDataString", + description="Literal data type representing a generic string.", + ), ] @@ -3489,14 +3506,9 @@ class ExecuteInputFile(AnyOfKeywordSchema): # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/binaryInputValue.yaml # FIXME: does not support bbox # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/bbox.yaml -class ExecuteInputInlineValue(OneOfKeywordSchema): +class ExecuteInputInlineValue(AnyLiteralType): + title = "ExecuteInputInlineValue" description = "Execute input value provided inline." - _one_of = [ - ExtendedSchemaNode(Float(), title="ExecuteInputValueFloat"), - ExtendedSchemaNode(Integer(), title="ExecuteInputValueInteger"), - ExtendedSchemaNode(Boolean(), title="ExecuteInputValueBoolean"), - ExtendedSchemaNode(String(), title="ExecuteInputValueString"), - ] # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/inputValue.yaml From 64d1499f5a304ed7ca51e44686fb2abfa3d2a246 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 5 Sep 2023 16:25:28 -0400 Subject: [PATCH 2/5] test WPS endpoint to allow execute with empty string input --- tests/functional/test_wps_app.py | 20 ++++++++++++++++++++ weaver/store/mongodb.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/functional/test_wps_app.py b/tests/functional/test_wps_app.py index 92d047bb7..e681b8e5e 100644 --- a/tests/functional/test_wps_app.py +++ b/tests/functional/test_wps_app.py @@ -20,6 +20,7 @@ setup_config_with_celery, setup_config_with_mongodb, setup_config_with_pywps, + setup_mongodb_jobstore, setup_mongodb_processstore ) from weaver import xml_util @@ -45,6 +46,7 @@ def setUp(self): config = setup_config_with_pywps(config) config = setup_config_with_celery(config) self.process_store = setup_mongodb_processstore(config) + self.job_store = setup_mongodb_jobstore(config) self.app = get_test_weaver_app(config=config, settings=settings) # add processes by database Process type @@ -146,6 +148,24 @@ def test_execute_allowed_demo(self): resp.mustcontain(" Job """ Gets job for given ``job_id`` from `MongoDB` storage. From f650d25793d45160211fef71ad92ecb3c50f7482 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 6 Sep 2023 19:08:17 -0400 Subject: [PATCH 3/5] revert mongodb change --- weaver/store/mongodb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index dbb3fe1b8..43a56e5a2 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -891,7 +891,7 @@ def delete_job(self, job_id): result = self.collection.delete_one({"id": job_id}) return result.deleted_count == 1 - def c(self, job_id): + def fetch_by_id(self, job_id): # type: (AnyUUID) -> Job """ Gets job for given ``job_id`` from `MongoDB` storage. From f434e8648f60d2156adc4291ec6ce91f38d9e267 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 6 Sep 2023 20:26:09 -0400 Subject: [PATCH 4/5] fix JSON cleanup utility to handle empty string sequences of quotes --- CHANGES.rst | 3 ++- tests/test_owsexceptions.py | 33 ++++++++++++++++++-------------- weaver/utils.py | 38 ++++++++++++++++++++++++++++++------- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index aa8411b0d..e903507b6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,9 +23,10 @@ Changes: Fixes: ------ - Fix broken `OpenAPI` schema link references to `OGC API - Processes` repository. +- Fix ``GET /providers/{provider_id}`` response using ``$schema`` instead of ``$id`` to provide its content schema. - Fix `Job` creation failing when submitting an empty string as input for a `Process` that allows it due to schema validation incorrectly preventing it. -- Fix ``GET /providers/{provider_id}`` response using ``$schema`` instead of ``$id`` to provide its content schema. +- Fix human-readable `JSON`-like content cleanup to preserve sequences of quotes corresponding to valid empty strings. .. _changes_4.30.1: diff --git a/tests/test_owsexceptions.py b/tests/test_owsexceptions.py index 61c7d0171..1a2aba3e3 100644 --- a/tests/test_owsexceptions.py +++ b/tests/test_owsexceptions.py @@ -1,8 +1,11 @@ +import pytest + from weaver.owsexceptions import OWSException -def test_owsexceptions_json_formatter(): - test_cases = [ +@pytest.mark.parametrize( + ["test", "expect"], + [ ("\nLeading new-line", "Leading new-line."), ("\nnew-lines\n\neverywhere\n", @@ -19,6 +22,8 @@ def test_owsexceptions_json_formatter(): "Loads of dots remains..."), ("Contains some u''strings' not escaped", "Contains some 'strings' not escaped."), + ("Does not remove double quotes \"\" or '' that represent an empty string: {\"test\": \"\", \"other\": ''}.", + "Does not remove double quotes '' or '' that represent an empty string: {'test': '', 'other': ''}."), ("With \"double quotes\" not escaped.", "With 'double quotes' not escaped."), ("With \'single quotes\' not escaped.", @@ -37,17 +42,17 @@ def test_owsexceptions_json_formatter(): "Another long line, with many commas and newlines, but placed differently, just for the heck of it."), ("Literal new-lines are\\nresolved to space", "Literal new-lines are resolved to space."), ] - +) +def test_owsexceptions_json_formatter(test, expect): test_code = 418 test_status = f"{test_code} I'm a teapot" - for test, expect in test_cases: - json_body = OWSException.json_formatter(status=test_status, body=test, title="SomeCode", environ={}) - assert json_body["code"] == "SomeCode" - assert json_body["error"]["code"] == test_code - assert json_body["error"]["status"] == test_status - result = json_body["description"] - assert json_body["description"] == expect, ( - "Result does not match expected value" - f"\n Result: '{result}'" - f"\n Expect: '{expect}'" - ) + json_body = OWSException.json_formatter(status=test_status, body=test, title="SomeCode", environ={}) + assert json_body["code"] == "SomeCode" + assert json_body["error"]["code"] == test_code + assert json_body["error"]["status"] == test_status + result = json_body["description"] + assert json_body["description"] == expect, ( + "Result does not match expected value" + f"\n Result: '{result}'" + f"\n Expect: '{expect}'" + ) diff --git a/weaver/utils.py b/weaver/utils.py index 1e57a1748..9914f2446 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -3481,15 +3481,32 @@ def assert_sane_name(name, min_len=3, max_len=None): raise InvalidIdentifierValue(f"Invalid name : {name}") -def clean_json_text_body(body, remove_newlines=True, remove_indents=True): - # type: (str, bool, bool) -> str +def clean_json_text_body(body, remove_newlines=True, remove_indents=True, convert_quotes=True): + # type: (str, bool, bool, bool) -> str """ Cleans a textual body field of superfluous characters to provide a better human-readable text in a JSON response. """ # cleanup various escape characters and u'' stings - replaces = [(",\n", ", "), ("\\n", " "), (" \n", " "), ("\n'", "'"), ("\"", "\'"), - ("u\'", "\'"), ("u\"", "\'"), ("'. ", ""), ("'. '", ""), - ("}'", "}"), ("'{", "{")] + replaces = [ + (",\n", ", "), + ("\\n", " "), + (" \n", " "), + ("\n'", "'"), + ("u\'", "\'"), + ("u\"", "\""), + ("'. ", ""), + ("'. '", ""), + ("}'", "}"), + ("'{", "{"), + ] + patterns = [ + (re.compile(r"(\w+)('{2,})([\s\]\}\)]+)"), "\\1'\\3"), + (re.compile(r"([\s\[\{\(]+)('{2,})(\w+)"), "\\1'\\3"), + (re.compile(r"(\w+)(\"{2,})([\s\]\}\)]+)"), "\\1\"\\3"), + (re.compile(r"([\s\[\{\(]+)(\"{2,})(\w+)"), "\\1\"\\3"), + ] + if convert_quotes: + replaces.extend([("\"", "\'")]) if remove_indents: replaces.extend([("\\", " "), (" ", " ")]) else: @@ -3501,6 +3518,8 @@ def clean_json_text_body(body, remove_newlines=True, remove_indents=True): while any(rf in body for rf in replaces_from): for _from, _to in replaces: body = body.replace(_from, _to) + for _from, _to in patterns: + body = re.sub(_from, _to, body) if remove_newlines: body_parts = [p.strip() for p in body.split("\n") if p != ""] # remove new line and extra spaces @@ -3511,8 +3530,13 @@ def clean_json_text_body(body, remove_newlines=True, remove_indents=True): body_clean = body # re-process without newlines to remove escapes created by concat of lines - if any(rf in body_clean for rf in replaces_from): - body_clean = clean_json_text_body(body_clean, remove_newlines=remove_newlines, remove_indents=remove_indents) + if any(rf in body_clean if isinstance(rf, str) else re.match(rf, body) for rf in replaces_from): + body_clean = clean_json_text_body( + body_clean, + remove_newlines=remove_newlines, + remove_indents=remove_indents, + convert_quotes=convert_quotes, + ) return body_clean From 97e800ca3b1f886ab2e9c0f378c800c422518f6a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 7 Sep 2023 09:52:57 -0400 Subject: [PATCH 5/5] fix linting --- weaver/wps_restapi/swagger_definitions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 4e6bf5f39..23f3370e9 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -1340,6 +1340,7 @@ class AnyLiteralType(OneOfKeywordSchema): description="Literal data type representing a boolean flag.", ), ExtendedSchemaNode( + # pylint: disable=C0301,line-too-long # FIXME: support byte/binary type (string + format:byte) ? # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/binaryInputValue.yaml # see if we can use 'encoding' parameter available for below 'String' schema-type to handle this?