diff --git a/docs/source/about/changelog.rst b/docs/source/about/changelog.rst index a83b0cc29..8a8c5f614 100644 --- a/docs/source/about/changelog.rst +++ b/docs/source/about/changelog.rst @@ -25,6 +25,7 @@ Unreleased **Fixed** +- :issue:`777` - Fix edge cases where ``html_to_vdom`` can fail to convert HTML - :issue:`789` - Conditionally rendered components cannot use contexts - :issue:`773` - Use strict equality check for text, numeric, and binary types in hooks - :issue:`801` - Accidental mutation of old model causes invalid JSON Patch @@ -38,6 +39,7 @@ Unreleased **Added** - :pull:`123` - ``asgiref`` as a dependency +- :pull:`795` - ``lxml`` as a dependency v0.39.0 diff --git a/requirements/pkg-deps.txt b/requirements/pkg-deps.txt index f13b33bf9..5e4835f12 100644 --- a/requirements/pkg-deps.txt +++ b/requirements/pkg-deps.txt @@ -6,3 +6,4 @@ fastjsonschema >=2.14.5 requests >=2 colorlog >=6 asgiref >=3 +lxml >= 4 diff --git a/src/idom/backend/utils.py b/src/idom/backend/utils.py index b891ec793..35e4e75dd 100644 --- a/src/idom/backend/utils.py +++ b/src/idom/backend/utils.py @@ -35,7 +35,7 @@ def run( implementation: BackendImplementation[Any] | None = None, ) -> None: """Run a component with a development server""" - logger.warn( + logger.warning( "You are running a development server. " "Change this before deploying in production!" ) diff --git a/src/idom/utils.py b/src/idom/utils.py index e8f9cfd01..ec114b2c3 100644 --- a/src/idom/utils.py +++ b/src/idom/utils.py @@ -1,8 +1,15 @@ -from html.parser import HTMLParser as _HTMLParser -from typing import Any, Callable, Dict, Generic, List, Optional, Tuple, TypeVar +from itertools import chain +from typing import Any, Callable, Generic, Iterable, List, TypeVar, Union + +from lxml import etree +from lxml.html import fragments_fromstring + +import idom +from idom.core.types import VdomDict _RefValue = TypeVar("_RefValue") +_ModelTransform = Callable[[VdomDict], Any] _UNDEFINED: Any = object() @@ -49,11 +56,9 @@ def __repr__(self) -> str: return f"{type(self).__name__}({current})" -_ModelTransform = Callable[[Dict[str, Any]], Any] - - -def html_to_vdom(source: str, *transforms: _ModelTransform) -> Dict[str, Any]: - """Transform HTML into a DOM model +def html_to_vdom(html: str, *transforms: _ModelTransform, strict: bool = True) -> VdomDict: + """Transform HTML into a DOM model. Unique keys can be provided to HTML elements + using a ``key=...`` attribute within your HTML tag. Parameters: source: @@ -62,81 +67,154 @@ def html_to_vdom(source: str, *transforms: _ModelTransform) -> Dict[str, Any]: Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM dictionary which will be replaced by ``new``. For example, you could use a transform function to add highlighting to a ```` block. + strict: + If ``True``, raise an exception if the HTML does not perfectly follow HTML5 + syntax. """ - parser = HtmlParser() - parser.feed(source) - root = parser.model() - to_visit = [root] - while to_visit: - node = to_visit.pop(0) - if isinstance(node, dict) and "children" in node: - transformed = [] - for child in node["children"]: - if isinstance(child, dict): - for t in transforms: - child = t(child) - if child is not None: - transformed.append(child) - to_visit.append(child) - node["children"] = transformed - if "attributes" in node and not node["attributes"]: - del node["attributes"] - if "children" in node and not node["children"]: - del node["children"] - return root - - -class HtmlParser(_HTMLParser): - """HTML to VDOM parser - - Example: - - .. code-block:: - - parser = HtmlParser() - - parser.feed(an_html_string) - parser.feed(another_html_string) - ... - - vdom = parser.model() + if not isinstance(html, str): # pragma: no cover + raise TypeError(f"Expected html to be a string, not {type(html).__name__}") + + # If the user provided a string, convert it to a list of lxml.etree nodes + parser = etree.HTMLParser( + remove_comments=True, + remove_pis=True, + remove_blank_text=True, + recover=not strict, + ) + try: + nodes: List = fragments_fromstring(html, no_leading_text=True, parser=parser) + except etree.XMLSyntaxError as e: + if not strict: + raise e # pragma: no cover + raise HTMLParseError( + "An error has occurred while parsing the HTML.\n\n" + "This HTML may be malformatted, or may not perfectly adhere to HTML5.\n" + "If you believe the exception above was due to something intentional, " + "you can disable the strict parameter on html_to_vdom().\n" + "Otherwise, repair your broken HTML and try again." + ) from e + has_root_node = len(nodes) == 1 + + # Find or create a root node + if has_root_node: + root_node = nodes[0] + else: + # etree.Element requires a non-empty tag - we correct this below + root_node = etree.Element("TEMP", None, None) + for child in nodes: + root_node.append(child) + + # Convert the lxml node to a VDOM dict + vdom = _etree_to_vdom(root_node, transforms) + + # Change the artificially created root node to a React Fragment, instead of a div + if not has_root_node: + vdom["tagName"] = "" + + return vdom + + +def _etree_to_vdom( + node: etree._Element, transforms: Iterable[_ModelTransform] +) -> VdomDict: + """Recusively transform an lxml etree node into a DOM model + + Parameters: + source: + The ``lxml.etree._Element`` node + transforms: + Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM + dictionary which will be replaced by ``new``. For example, you could use a + transform function to add highlighting to a ```` block. """ + if not isinstance(node, etree._Element): # pragma: no cover + raise TypeError( + f"Expected node to be a etree._Element, not {type(node).__name__}" + ) - def model(self) -> Dict[str, Any]: - """Get the current state of parsed VDOM model""" - return self._node_stack[0] - - def feed(self, data: str) -> None: - """Feed in HTML that will update the :meth:`HtmlParser.model`""" - self._node_stack.append(self._make_vdom("div", {})) - super().feed(data) - - def reset(self) -> None: - """Reset the state of the parser""" - self._node_stack: List[Dict[str, Any]] = [] - super().reset() - - def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: - new = self._make_vdom(tag, dict(attrs)) - current = self._node_stack[-1] - current["children"].append(new) - self._node_stack.append(new) - - def handle_endtag(self, tag: str) -> None: - del self._node_stack[-1] - - def handle_data(self, data: str) -> None: - self._node_stack[-1]["children"].append(data) - - @staticmethod - def _make_vdom(tag: str, attrs: Dict[str, Any]) -> Dict[str, Any]: - if "style" in attrs: - style = attrs["style"] - if isinstance(style, str): - style_dict = {} - for k, v in (part.split(":", 1) for part in style.split(";") if part): - title_case_key = k.title().replace("-", "") - camel_case_key = title_case_key[:1].lower() + title_case_key[1:] - style_dict[camel_case_key] = v - attrs["style"] = style_dict - return {"tagName": tag, "attributes": attrs, "children": []} + # This will recursively call _etree_to_vdom() on all children + children = _generate_vdom_children(node, transforms) + + # Convert the lxml node to a VDOM dict + attributes = dict(node.items()) + key = attributes.pop("key", None) + + if hasattr(idom.html, node.tag): + vdom = getattr(idom.html, node.tag)(attributes, *children, key=key) + else: + vdom: VdomDict = {"tagName": node.tag} + if children: + vdom["children"] = children + if attributes: + vdom["attributes"] = attributes + if key is not None: + vdom["key"] = key + + # Perform any necessary mutations on the VDOM attributes to meet VDOM spec + _mutate_vdom(vdom) + + # Apply any provided transforms. + for transform in transforms: + vdom = transform(vdom) + + return vdom + + +def _mutate_vdom(vdom: VdomDict): + """Performs any necessary mutations on the VDOM attributes to meet VDOM spec. + + Currently, this function only transforms the ``style`` attribute into a dictionary whose keys are + camelCase so as to be renderable by React. + + This function may be extended in the future. + """ + # Determine if the style attribute needs to be converted to a dict + if ( + "attributes" in vdom + and "style" in vdom["attributes"] + and isinstance(vdom["attributes"]["style"], str) + ): + # Convince type checker that it's safe to mutate attributes + assert isinstance(vdom["attributes"], dict) + + # Convert style attribute from str -> dict with camelCase keys + vdom["attributes"]["style"] = { + _hypen_to_camel_case(key.strip()): value.strip() + for key, value in ( + part.split(":", 1) + for part in vdom["attributes"]["style"].split(";") + if ":" in part + ) + } + + +def _generate_vdom_children( + node: etree._Element, transforms: Iterable[_ModelTransform] +) -> List[Union[VdomDict, str]]: + """Generates a list of VDOM children from an lxml node. + + Inserts inner text and/or tail text inbetween VDOM children, if necessary. + """ + return ( # Get the inner text of the current node + [node.text] if node.text else [] + ) + list( + chain( + *( + # Recursively convert each child node to VDOM + [_etree_to_vdom(child, transforms)] + # Insert the tail text between each child node + + ([child.tail] if child.tail else []) + for child in node.iterchildren(None) + ) + ) + ) + + +def _hypen_to_camel_case(string: str) -> str: + """Convert a hypenated string to camelCase.""" + first, _, remainder = string.partition("-") + return first.lower() + remainder.title().replace("-", "") + + +class HTMLParseError(etree.LxmlSyntaxError): + """Raised when an HTML document cannot be parsed using strict parsing.""" diff --git a/src/idom/widgets.py b/src/idom/widgets.py index a089b9d21..b66e89348 100644 --- a/src/idom/widgets.py +++ b/src/idom/widgets.py @@ -80,7 +80,7 @@ def use_linked_inputs( value, set_value = idom.hooks.use_state(initial_value) def sync_inputs(event: Dict[str, Any]) -> None: - new_value = event["value"] + new_value = event["target"]["value"] set_value(new_value) if not new_value and ignore_empty: return None diff --git a/tests/test_utils.py b/tests/test_utils.py index cca97a0ac..861fc315d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,7 @@ import pytest import idom -from idom.utils import html_to_vdom +from idom.utils import HTMLParseError, html_to_vdom def test_basic_ref_behavior(): @@ -60,18 +60,15 @@ def test_ref_repr(): ], ) def test_html_to_vdom(case): - assert html_to_vdom(case["source"]) == { - "tagName": "div", - "children": [case["model"]], - } + assert html_to_vdom(case["source"]) == case["model"] def test_html_to_vdom_transform(): - source = "

hello world and universe

" + source = "

hello world and universelmao

" def make_links_blue(node): if node["tagName"] == "a": - node["attributes"]["style"] = {"color": "blue"} + node["attributes"] = {"style": {"color": "blue"}} return node expected = { @@ -89,10 +86,66 @@ def make_links_blue(node): "children": ["universe"], "attributes": {"style": {"color": "blue"}}, }, + "lmao", + ], + } + + assert html_to_vdom(source, make_links_blue) == expected + + +def test_non_html_tag_behavior(): + source = "" + + expected = { + "tagName": "my-tag", + "attributes": {"data-x": "something"}, + "children": [ + {"tagName": "my-other-tag", "key": "a-key"}, ], } - assert html_to_vdom(source, make_links_blue) == { - "tagName": "div", - "children": [expected], + assert html_to_vdom(source, strict=False) == expected + + with pytest.raises(HTMLParseError): + html_to_vdom(source, strict=True) + + +def test_html_to_vdom_with_null_tag(): + source = "

hello
world

" + + expected = { + "tagName": "p", + "children": [ + "hello", + {"tagName": "br"}, + "world", + ], } + + assert html_to_vdom(source) == expected + + +def test_html_to_vdom_with_style_attr(): + source = '

Hello World.

' + + expected = { + "attributes": {"style": {"backgroundColor": "green", "color": "red"}}, + "children": ["Hello World."], + "tagName": "p", + } + + assert html_to_vdom(source) == expected + + +def test_html_to_vdom_with_no_parent_node(): + source = "

Hello

World
" + + expected = { + "tagName": "", + "children": [ + {"tagName": "p", "children": ["Hello"]}, + {"tagName": "div", "children": ["World"]}, + ], + } + + assert html_to_vdom(source) == expected