Refine simple types decode and type annotations

- Fix for issue #356 requires a new bugfix release of elementpath package (maybe v4.1.5)
sissaschool · Jul 25, 2023 · b743c0a · b743c0a
1 parent 4c23ad6
commit b743c0a
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 39 deletions.
diff --git a/.github/workflows/test-xmlschema.yml b/.github/workflows/test-xmlschema.yml
@@ -47,10 +47,10 @@ jobs:
       - name: Lint with mypy if Python version != 3.7
         if: ${{ matrix.python-version != '3.7' }}
         run: |
-          pip install mypy==1.3.0 elementpath==4.1.2 lxml-stubs
+          pip install mypy==1.4.1 elementpath==4.1.5 lxml-stubs
           mypy --show-error-codes --strict xmlschema
       - name: Lint with mypy if Python version == 3.7
         if: ${{ matrix.python-version == '3.7' }}
         run: |
-          pip install mypy==1.3.0 elementpath==4.1.2 lxml-stubs
+          pip install mypy==1.4.1 elementpath==4.1.5 lxml-stubs
           mypy --show-error-codes --no-warn-redundant-casts --no-warn-unused-ignores --strict xmlschema
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,7 +2,7 @@
 setuptools
 tox>=4.0
 coverage
-elementpath>=4.1.2, <5.0.0
+elementpath>=4.1.5, <5.0.0
 lxml
 jinja2
 memory_profiler

diff --git a/setup.py b/setup.py
@@ -32,13 +32,13 @@
         ]
     },
     python_requires='>=3.7',
-    install_requires=['elementpath>=4.1.2, <5.0.0'],
+    install_requires=['elementpath>=4.1.5, <5.0.0'],
     extras_require={
-        'codegen': ['elementpath>=4.1.2, <5.0.0', 'jinja2'],
-        'dev': ['tox', 'coverage', 'lxml', 'elementpath>=4.1.2, <5.0.0',
+        'codegen': ['elementpath>=4.1.5, <5.0.0', 'jinja2'],
+        'dev': ['tox', 'coverage', 'lxml', 'elementpath>=4.1.5, <5.0.0',
                 'memory_profiler', 'Sphinx', 'sphinx_rtd_theme', 'jinja2',
                 'flake8', 'mypy', 'lxml-stubs'],
-        'docs': ['elementpath>=4.1.2, <5.0.0', 'Sphinx', 'sphinx_rtd_theme', 'jinja2']
+        'docs': ['elementpath>=4.1.5, <5.0.0', 'Sphinx', 'sphinx_rtd_theme', 'jinja2']
     },
     author='Davide Brunato',
     author_email='[email protected]',

diff --git a/tests/validation/test_validation.py b/tests/validation/test_validation.py
@@ -355,6 +355,41 @@ def test_issue_350__ignore_xsi_type_for_schema_validation(self):
             'xsi:type="non-empty-string">foo</root>'
         ))
 
+    def test_issue_356__validate_empty_simple_elements(self):
+        schema = xmlschema.XMLSchema(dedent("""\
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+
+                <xs:element name="root1" type="emptyString" />
+                <xs:element name="root2" type="emptyList" />
+                <xs:element name="root3" type="emptiableUnion" />
+
+                <xs:simpleType name="emptyString">
+                    <xs:restriction base='xs:string'>
+                      <xs:length value="0"/>
+                    </xs:restriction>
+                </xs:simpleType>
+
+                <xs:simpleType name="emptyList">
+                    <xs:list itemType="emptyString"/>
+                </xs:simpleType>
+                
+                <xs:simpleType name="emptiableUnion">
+                    <xs:union memberTypes="xs:int emptyString"/>
+                </xs:simpleType>
+
+            </xs:schema>"""))
+
+        self.assertTrue(schema.is_valid('<root1></root1>'))
+        self.assertFalse(schema.is_valid('<root1>foo</root1>'))
+
+        self.assertTrue(schema.is_valid('<root2></root2>'))
+        self.assertFalse(schema.is_valid('<root2>foo</root2>'))
+        self.assertFalse(schema.is_valid('<root2>foo bar</root2>'))
+
+        self.assertTrue(schema.is_valid('<root3>1</root3>'))
+        self.assertTrue(schema.is_valid('<root3></root3>'))
+        self.assertFalse(schema.is_valid('<root3>foo</root3>'))
+
 
 class TestValidation11(TestValidation):
     schema_class = XMLSchema11

diff --git a/tox.ini b/tox.ini
@@ -6,7 +6,7 @@ work_dir = {tox_root}/../.tox/xmlschema
 
 [testenv]
 deps =
-    elementpath>=4.1.2, <5.0.0
+    elementpath>=4.1.5, <5.0.0
     lxml
     jinja2
     py{310,311}: memory_profiler
@@ -18,7 +18,7 @@ commands =
 
 [testenv:py312]
 deps =
-    elementpath>=4.1.2, <5.0.0
+    elementpath>=4.1.5, <5.0.0
     # lxml: skip for now
     jinja2
 
@@ -42,17 +42,17 @@ commands =
 
 [testenv:mypy-py37]
 deps =
-    mypy==1.3.0
-    elementpath==4.1.2
+    mypy==1.4.1
+    elementpath==4.1.5
     lxml-stubs
     jinja2
 commands =
     mypy --config-file {toxinidir}/mypy.ini xmlschema
 
 [testenv:mypy-py{38,39,310,311,312,py3}]
 deps =
-    mypy==1.3.0
-    elementpath==4.1.2
+    mypy==1.4.1
+    elementpath==4.1.5
     lxml-stubs
     jinja2
 commands =
@@ -69,10 +69,10 @@ commands =
 deps =
     pytest
     pytest-randomly
-    elementpath>=4.1.2, <5.0.0
+    elementpath>=4.1.5, <5.0.0
     lxml
     jinja2
-    mypy==1.3.0
+    mypy==1.4.1
     lxml-stubs
 commands =
     pytest tests -ra

diff --git a/xmlschema/aliases.py b/xmlschema/aliases.py
@@ -78,8 +78,9 @@
 
     ##
     # Type aliases for datatypes
-    AtomicValueType = Union[str, int, float, Decimal, bool, Integer, Float10, NormalizedString,
-                            AnyURI, HexBinary, Base64Binary, QName, Duration, OrderedDateTime, Time]
+    AtomicValueType = Union[str, bytes, int, float, Decimal, bool, Integer,
+                            Float10, NormalizedString, AnyURI, HexBinary,
+                            Base64Binary, QName, Duration, OrderedDateTime, Time]
     NumericValueType = Union[str, bytes, int, float, Decimal]
     DateTimeType = Union[OrderedDateTime, Time]
 
@@ -100,8 +101,9 @@
     JsonDecodeType = Union[str, None, Tuple[XMLSchemaValidationError, ...],
                            Tuple[Union[str, None], Tuple[XMLSchemaValidationError, ...]]]
 
-    DecodedValueType = Union[None, AtomicValueType, List[AtomicValueType]]
-    EncodedValueType = Union[None, str, List[str]]
+    DecodedValueType = Union[None, AtomicValueType, List[Optional[AtomicValueType]],
+                             XMLSchemaValidationError]
+    EncodedValueType = Union[None, str, List[str], XMLSchemaValidationError]
 
     FillerType = Callable[[Union[XsdElement, XsdAttribute]], Any]
     DepthFillerType = Callable[[XsdElement], Any]

diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py
@@ -923,15 +923,17 @@ def iter_components(self, xsd_classes: ComponentClassType = None) \
         if self.base_type.parent is not None:
             yield from self.base_type.iter_components(xsd_classes)
 
-    def iter_decode(self, obj: Union[str, bytes],  # type: ignore[override]
+    def iter_decode(self, obj: Union[str, bytes],
                     validation: str = 'lax', **kwargs: Any) \
-            -> IterDecodeType[List[DecodedValueType]]:
+            -> IterDecodeType[Union[XMLSchemaValidationError,
+                              List[Optional[AtomicValueType]]]]:
         items = []
         for chunk in self.normalize(obj).split():
             for result in self.base_type.iter_decode(chunk, validation, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
+                    assert not isinstance(result, list)
                     items.append(result)
         else:
             yield items
@@ -1084,28 +1086,31 @@ def iter_components(self, xsd_classes: ComponentClassType = None) \
         for mt in filter(lambda x: x.parent is not None, self.member_types):
             yield from mt.iter_components(xsd_classes)
 
-    def iter_decode(self, obj: Any, validation: str = 'lax',
+    def iter_decode(self, obj: AtomicValueType, validation: str = 'lax',
                     patterns: Optional[XsdPatternFacets] = None,
                     **kwargs: Any) -> IterDecodeType[DecodedValueType]:
 
-        # Try decoding the whole text
+        # Try decoding the whole text (or validate the decoded atomic value)
         for member_type in self.member_types:
             for result in member_type.iter_decode(obj, validation='lax', **kwargs):
                 if not isinstance(result, XMLSchemaValidationError):
-                    if patterns:
-                        obj = member_type.normalize(obj)
+                    if patterns and isinstance(obj, (str, bytes)):
                         try:
-                            patterns(obj)
+                            patterns(member_type.normalize(obj))
                         except XMLSchemaValidationError as err:
                             yield err
 
                     yield result
                     return
                 break
 
-        if ' ' not in obj.strip():
+        if isinstance(obj, bytes):
+            obj = obj.decode('utf-8')
+
+        if not isinstance(obj, str) or ' ' not in obj.strip():
             reason = _("invalid value {!r}").format(obj)
             yield XMLSchemaDecodeError(self, obj, self.member_types, reason)
+            return
 
         items = []
         not_decodable = []
@@ -1364,11 +1369,20 @@ def iter_components(self, xsd_classes: ComponentClassType = None) \
         if self.base_type.parent is not None:
             yield from self.base_type.iter_components(xsd_classes)
 
-    def iter_decode(self, obj: Union[str, bytes], validation: str = 'lax', **kwargs: Any) \
+    def iter_decode(self, obj: AtomicValueType, validation: str = 'lax', **kwargs: Any) \
             -> IterDecodeType[DecodedValueType]:
         if isinstance(obj, (str, bytes)):
             obj = self.normalize(obj)
 
+            if self.patterns:
+                if not isinstance(self.primitive_type, XsdUnion):
+                    try:
+                        self.patterns(obj)
+                    except XMLSchemaValidationError as err:
+                        yield err
+                elif 'patterns' not in kwargs:
+                    kwargs['patterns'] = self.patterns
+
         base_type: Any
         if isinstance(self.base_type, XsdSimpleType):
             base_type = self.base_type
@@ -1377,20 +1391,11 @@ def iter_decode(self, obj: Union[str, bytes], validation: str = 'lax', **kwargs:
         elif self.base_type.mixed:
             yield obj
             return
-        else:
+        else:  # pragma: no cover
             msg = _("wrong base type %r: a simpleType or a complexType "
                     "with simple or mixed content required")
             raise XMLSchemaValueError(msg % self.base_type)
 
-        if self.patterns:
-            if not isinstance(self.primitive_type, XsdUnion):
-                try:
-                    self.patterns(obj)
-                except XMLSchemaValidationError as err:
-                    yield err
-            elif 'patterns' not in kwargs:
-                kwargs['patterns'] = self.patterns
-
         for result in base_type.iter_decode(obj, validation, **kwargs):
             if isinstance(result, XMLSchemaValidationError):
                 yield result
@@ -1424,7 +1429,7 @@ def iter_encode(self, obj: Any, validation: str = 'lax', **kwargs: Any) \
             elif self.base_type.mixed:
                 yield str(obj)
                 return
-            else:
+            else:  # pragma: no cover
                 msg = _("wrong base type %r: a simpleType or a complexType "
                         "with simple or mixed content required")
                 raise XMLSchemaValueError(msg % self.base_type)