From 53c09bbf6d00d5be1841de94e48a6fea92651248 Mon Sep 17 00:00:00 2001 From: francisPLT Date: Thu, 13 Jul 2023 17:03:33 -0400 Subject: [PATCH 1/2] Substitute `parse_cwl_array_type` with `get_cwl_io_type` in `make_inputs` --- weaver/processes/wps_package.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 07b11553e..4ac389041 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -83,12 +83,12 @@ from weaver.processes.convert import ( DEFAULT_FORMAT, cwl2wps_io, + get_cwl_io_type, json2wps_field, json2wps_io, merge_package_io, normalize_ordered_io, ogcapi2cwl_process, - parse_cwl_array_type, wps2json_io, xml_wps2cwl ) @@ -1858,7 +1858,7 @@ def make_inputs(self, # process single occurrences input_i = input_occurs[0] # handle as reference/data - io_def = parse_cwl_array_type(cwl_inputs_info[input_id]) + io_def = get_cwl_io_type(cwl_inputs_info[input_id]) if isinstance(input_i, ComplexInput) or io_def.type in PACKAGE_COMPLEX_TYPES: # extend array data that allow max_occur > 1 # drop invalid inputs returned as None From c3ab5580076e3ed55583f79e15e5d20318740631 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 14 Sep 2023 14:36:49 -0400 Subject: [PATCH 2/2] fix get_cwl_io_type to avoid modification of type CWL class reference --- CHANGES.rst | 6 +++ tests/processes/test_convert.py | 72 +++++++++++++++++++++++++-------- weaver/processes/convert.py | 1 + 3 files changed, 62 insertions(+), 17 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8df29b931..8bd04e26e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -18,6 +18,12 @@ Fixes: ------ - Fix missing Node.js requirement in built Docker image in order to evaluate definitions that employ `CWL` ``InlineJavascriptRequirement``, such as ``valueFrom`` employed for numeric ``Enum`` input type validation. +- Fix ``processes.wps_package.WpsPackage.make_inputs`` unable to parse multi-type `CWL` definitions due parsing + as single-type element with ``parse_cwl_array_type``. Function ``get_cwl_io_type`` is used instead to resolve any + `CWL` type combination properly. +- Fix ``get_cwl_io_type`` function that would modify the I/O definition passed as argument, which could lead to failing + `CWL` ``class`` reference resolutions later on due to different ``type`` with ``org.w3id.cwl.cwl`` prefix simplified + before ``cwltool`` had the chance to resolve them. .. _changes_4.31.0: diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index 44e31bf39..0f8f1601a 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -1,6 +1,8 @@ """ Unit tests of functions within :mod:`weaver.processes.convert`. """ +import copy + # pylint: disable=R1729 # ignore non-generator representation employed for displaying test log results import json @@ -708,7 +710,7 @@ def test_cwl2wps_io_record_format(): @pytest.mark.parametrize( - "io_type, io_info", + ["io_type", "io_info"], [ (WPS_LITERAL, {"type": WPS_LITERAL}), (WPS_COMPLEX, {"type": WPS_COMPLEX}), @@ -727,28 +729,64 @@ def test_get_io_type_category(io_type, io_info): assert get_io_type_category(io_info) == io_type, f"Testing: {io_info}" -@pytest.mark.parametrize("io_info, io_def", [ - ({"type": "string"}, - CWLIODefinition(type="string")), - ({"type": "int"}, - CWLIODefinition(type="int")), - ({"type": "float"}, - CWLIODefinition(type="float")), - ({"type": {"type": "enum", "symbols": ["a", "b", "c"]}}, - CWLIODefinition(type="string", enum=True, symbols=["a", "b", "c"], mode=MODE.SIMPLE)), - ({"type": {"type": "array", "items": "string"}}, - CWLIODefinition(type="string", array=True, min_occurs=1, max_occurs=PACKAGE_ARRAY_MAX_SIZE)), - ({"type": ["null", "string"]}, - CWLIODefinition(type="string", null=True, min_occurs=0)), - ({"type": "string?"}, - CWLIODefinition(type="string", null=True, min_occurs=0)), -]) +@pytest.mark.parametrize( + ["io_info", "io_def"], + [ + ({"type": "string"}, + CWLIODefinition(type="string")), + ({"type": "int"}, + CWLIODefinition(type="int")), + ({"type": "float"}, + CWLIODefinition(type="float")), + ({"type": {"type": "enum", "symbols": ["a", "b", "c"]}}, + CWLIODefinition(type="string", enum=True, symbols=["a", "b", "c"], mode=MODE.SIMPLE)), + ({"type": {"type": "array", "items": "string"}}, + CWLIODefinition(type="string", array=True, min_occurs=1, max_occurs=PACKAGE_ARRAY_MAX_SIZE)), + ({"type": ["null", "string"]}, + CWLIODefinition(type="string", null=True, min_occurs=0)), + ({"type": "string?"}, + CWLIODefinition(type="string", null=True, min_occurs=0)), + ] +) def test_get_cwl_io_type(io_info, io_def): io_def.name = io_info["name"] = "test" io_res = get_cwl_io_type(io_info) assert io_res == io_def +@pytest.mark.parametrize( + ["io_info", "io_def"], + [ + ( + { + "name": "test", + "type": "org.w3id.cwl.cwl.File", + "format": "https://www.iana.org/assignments/media-types/application/json", + "location": "/tmp/random.json", + }, + CWLIODefinition(name="test", type="File") + ) + ] +) +def test_get_cwl_io_type_unmodified(io_info, io_def): + """ + Ensure that the input I/O details do not cause a side effect modification of the data when parsing the definition. + + When :func:`get_cwl_io_type` was called with a definition containing ``type: org.w3id.cwl.cwl.File``, the resulting + parsing caused the input information to be overriden by ``type: File``. Although they are essentially equivalent + once resolved, this modification performed before :mod:`cwltool` had the time to parse the definition made it + incorrectly resolve ``class: File``, which in turn, caused :class:`cwltool.pathmapper.PathMapper` to be missing + the mapped ``location`` of provided inputs, leading to full :term:`CWL` execution failure. + + .. seealso:: + - https://github.com/crim-ca/weaver/pull/546 + """ + io_copy = copy.deepcopy(io_info) + io_res = get_cwl_io_type(io_info) + assert io_res == io_def + assert io_info == io_copy, "Argument I/O information should not be modified from parsing." + + def test_parse_cwl_array_type_explicit_invalid_item(): io_info = { "name": "test", diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index d79af1e5f..9016e0cb0 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -1337,6 +1337,7 @@ def get_cwl_io_type(io_info, strict=True): LOGGER.debug("I/O parsed for multiple base types") # parse single-definition + io_info = io_info.copy() io_info["type"] = io_type # override resolved multi-type base for more parsing io_name = io_info["name"] io_min_occurs = 0 if is_null else 1