diff --git a/docs/source/about/changelog.rst b/docs/source/about/changelog.rst
index a83b0cc29..8a8c5f614 100644
--- a/docs/source/about/changelog.rst
+++ b/docs/source/about/changelog.rst
@@ -25,6 +25,7 @@ Unreleased
**Fixed**
+- :issue:`777` - Fix edge cases where ``html_to_vdom`` can fail to convert HTML
- :issue:`789` - Conditionally rendered components cannot use contexts
- :issue:`773` - Use strict equality check for text, numeric, and binary types in hooks
- :issue:`801` - Accidental mutation of old model causes invalid JSON Patch
@@ -38,6 +39,7 @@ Unreleased
**Added**
- :pull:`123` - ``asgiref`` as a dependency
+- :pull:`795` - ``lxml`` as a dependency
v0.39.0
diff --git a/requirements/pkg-deps.txt b/requirements/pkg-deps.txt
index f13b33bf9..5e4835f12 100644
--- a/requirements/pkg-deps.txt
+++ b/requirements/pkg-deps.txt
@@ -6,3 +6,4 @@ fastjsonschema >=2.14.5
requests >=2
colorlog >=6
asgiref >=3
+lxml >= 4
diff --git a/src/idom/backend/utils.py b/src/idom/backend/utils.py
index b891ec793..35e4e75dd 100644
--- a/src/idom/backend/utils.py
+++ b/src/idom/backend/utils.py
@@ -35,7 +35,7 @@ def run(
implementation: BackendImplementation[Any] | None = None,
) -> None:
"""Run a component with a development server"""
- logger.warn(
+ logger.warning(
"You are running a development server. "
"Change this before deploying in production!"
)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index e8f9cfd01..ec114b2c3 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -1,8 +1,15 @@
-from html.parser import HTMLParser as _HTMLParser
-from typing import Any, Callable, Dict, Generic, List, Optional, Tuple, TypeVar
+from itertools import chain
+from typing import Any, Callable, Generic, Iterable, List, TypeVar, Union
+
+from lxml import etree
+from lxml.html import fragments_fromstring
+
+import idom
+from idom.core.types import VdomDict
_RefValue = TypeVar("_RefValue")
+_ModelTransform = Callable[[VdomDict], Any]
_UNDEFINED: Any = object()
@@ -49,11 +56,9 @@ def __repr__(self) -> str:
return f"{type(self).__name__}({current})"
-_ModelTransform = Callable[[Dict[str, Any]], Any]
-
-
-def html_to_vdom(source: str, *transforms: _ModelTransform) -> Dict[str, Any]:
- """Transform HTML into a DOM model
+def html_to_vdom(html: str, *transforms: _ModelTransform, strict: bool = True) -> VdomDict:
+ """Transform HTML into a DOM model. Unique keys can be provided to HTML elements
+ using a ``key=...`` attribute within your HTML tag.
Parameters:
source:
@@ -62,81 +67,154 @@ def html_to_vdom(source: str, *transforms: _ModelTransform) -> Dict[str, Any]:
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ```` block.
+ strict:
+ If ``True``, raise an exception if the HTML does not perfectly follow HTML5
+ syntax.
"""
- parser = HtmlParser()
- parser.feed(source)
- root = parser.model()
- to_visit = [root]
- while to_visit:
- node = to_visit.pop(0)
- if isinstance(node, dict) and "children" in node:
- transformed = []
- for child in node["children"]:
- if isinstance(child, dict):
- for t in transforms:
- child = t(child)
- if child is not None:
- transformed.append(child)
- to_visit.append(child)
- node["children"] = transformed
- if "attributes" in node and not node["attributes"]:
- del node["attributes"]
- if "children" in node and not node["children"]:
- del node["children"]
- return root
-
-
-class HtmlParser(_HTMLParser):
- """HTML to VDOM parser
-
- Example:
-
- .. code-block::
-
- parser = HtmlParser()
-
- parser.feed(an_html_string)
- parser.feed(another_html_string)
- ...
-
- vdom = parser.model()
+ if not isinstance(html, str): # pragma: no cover
+ raise TypeError(f"Expected html to be a string, not {type(html).__name__}")
+
+ # If the user provided a string, convert it to a list of lxml.etree nodes
+ parser = etree.HTMLParser(
+ remove_comments=True,
+ remove_pis=True,
+ remove_blank_text=True,
+ recover=not strict,
+ )
+ try:
+ nodes: List = fragments_fromstring(html, no_leading_text=True, parser=parser)
+ except etree.XMLSyntaxError as e:
+ if not strict:
+ raise e # pragma: no cover
+ raise HTMLParseError(
+ "An error has occurred while parsing the HTML.\n\n"
+ "This HTML may be malformatted, or may not perfectly adhere to HTML5.\n"
+ "If you believe the exception above was due to something intentional, "
+ "you can disable the strict parameter on html_to_vdom().\n"
+ "Otherwise, repair your broken HTML and try again."
+ ) from e
+ has_root_node = len(nodes) == 1
+
+ # Find or create a root node
+ if has_root_node:
+ root_node = nodes[0]
+ else:
+ # etree.Element requires a non-empty tag - we correct this below
+ root_node = etree.Element("TEMP", None, None)
+ for child in nodes:
+ root_node.append(child)
+
+ # Convert the lxml node to a VDOM dict
+ vdom = _etree_to_vdom(root_node, transforms)
+
+ # Change the artificially created root node to a React Fragment, instead of a div
+ if not has_root_node:
+ vdom["tagName"] = ""
+
+ return vdom
+
+
+def _etree_to_vdom(
+ node: etree._Element, transforms: Iterable[_ModelTransform]
+) -> VdomDict:
+ """Recusively transform an lxml etree node into a DOM model
+
+ Parameters:
+ source:
+ The ``lxml.etree._Element`` node
+ transforms:
+ Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
+ dictionary which will be replaced by ``new``. For example, you could use a
+ transform function to add highlighting to a ``
`` block.
"""
+ if not isinstance(node, etree._Element): # pragma: no cover
+ raise TypeError(
+ f"Expected node to be a etree._Element, not {type(node).__name__}"
+ )
- def model(self) -> Dict[str, Any]:
- """Get the current state of parsed VDOM model"""
- return self._node_stack[0]
-
- def feed(self, data: str) -> None:
- """Feed in HTML that will update the :meth:`HtmlParser.model`"""
- self._node_stack.append(self._make_vdom("div", {}))
- super().feed(data)
-
- def reset(self) -> None:
- """Reset the state of the parser"""
- self._node_stack: List[Dict[str, Any]] = []
- super().reset()
-
- def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
- new = self._make_vdom(tag, dict(attrs))
- current = self._node_stack[-1]
- current["children"].append(new)
- self._node_stack.append(new)
-
- def handle_endtag(self, tag: str) -> None:
- del self._node_stack[-1]
-
- def handle_data(self, data: str) -> None:
- self._node_stack[-1]["children"].append(data)
-
- @staticmethod
- def _make_vdom(tag: str, attrs: Dict[str, Any]) -> Dict[str, Any]:
- if "style" in attrs:
- style = attrs["style"]
- if isinstance(style, str):
- style_dict = {}
- for k, v in (part.split(":", 1) for part in style.split(";") if part):
- title_case_key = k.title().replace("-", "")
- camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
- style_dict[camel_case_key] = v
- attrs["style"] = style_dict
- return {"tagName": tag, "attributes": attrs, "children": []}
+ # This will recursively call _etree_to_vdom() on all children
+ children = _generate_vdom_children(node, transforms)
+
+ # Convert the lxml node to a VDOM dict
+ attributes = dict(node.items())
+ key = attributes.pop("key", None)
+
+ if hasattr(idom.html, node.tag):
+ vdom = getattr(idom.html, node.tag)(attributes, *children, key=key)
+ else:
+ vdom: VdomDict = {"tagName": node.tag}
+ if children:
+ vdom["children"] = children
+ if attributes:
+ vdom["attributes"] = attributes
+ if key is not None:
+ vdom["key"] = key
+
+ # Perform any necessary mutations on the VDOM attributes to meet VDOM spec
+ _mutate_vdom(vdom)
+
+ # Apply any provided transforms.
+ for transform in transforms:
+ vdom = transform(vdom)
+
+ return vdom
+
+
+def _mutate_vdom(vdom: VdomDict):
+ """Performs any necessary mutations on the VDOM attributes to meet VDOM spec.
+
+ Currently, this function only transforms the ``style`` attribute into a dictionary whose keys are
+ camelCase so as to be renderable by React.
+
+ This function may be extended in the future.
+ """
+ # Determine if the style attribute needs to be converted to a dict
+ if (
+ "attributes" in vdom
+ and "style" in vdom["attributes"]
+ and isinstance(vdom["attributes"]["style"], str)
+ ):
+ # Convince type checker that it's safe to mutate attributes
+ assert isinstance(vdom["attributes"], dict)
+
+ # Convert style attribute from str -> dict with camelCase keys
+ vdom["attributes"]["style"] = {
+ _hypen_to_camel_case(key.strip()): value.strip()
+ for key, value in (
+ part.split(":", 1)
+ for part in vdom["attributes"]["style"].split(";")
+ if ":" in part
+ )
+ }
+
+
+def _generate_vdom_children(
+ node: etree._Element, transforms: Iterable[_ModelTransform]
+) -> List[Union[VdomDict, str]]:
+ """Generates a list of VDOM children from an lxml node.
+
+ Inserts inner text and/or tail text inbetween VDOM children, if necessary.
+ """
+ return ( # Get the inner text of the current node
+ [node.text] if node.text else []
+ ) + list(
+ chain(
+ *(
+ # Recursively convert each child node to VDOM
+ [_etree_to_vdom(child, transforms)]
+ # Insert the tail text between each child node
+ + ([child.tail] if child.tail else [])
+ for child in node.iterchildren(None)
+ )
+ )
+ )
+
+
+def _hypen_to_camel_case(string: str) -> str:
+ """Convert a hypenated string to camelCase."""
+ first, _, remainder = string.partition("-")
+ return first.lower() + remainder.title().replace("-", "")
+
+
+class HTMLParseError(etree.LxmlSyntaxError):
+ """Raised when an HTML document cannot be parsed using strict parsing."""
diff --git a/src/idom/widgets.py b/src/idom/widgets.py
index a089b9d21..b66e89348 100644
--- a/src/idom/widgets.py
+++ b/src/idom/widgets.py
@@ -80,7 +80,7 @@ def use_linked_inputs(
value, set_value = idom.hooks.use_state(initial_value)
def sync_inputs(event: Dict[str, Any]) -> None:
- new_value = event["value"]
+ new_value = event["target"]["value"]
set_value(new_value)
if not new_value and ignore_empty:
return None
diff --git a/tests/test_utils.py b/tests/test_utils.py
index cca97a0ac..861fc315d 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,7 @@
import pytest
import idom
-from idom.utils import html_to_vdom
+from idom.utils import HTMLParseError, html_to_vdom
def test_basic_ref_behavior():
@@ -60,18 +60,15 @@ def test_ref_repr():
],
)
def test_html_to_vdom(case):
- assert html_to_vdom(case["source"]) == {
- "tagName": "div",
- "children": [case["model"]],
- }
+ assert html_to_vdom(case["source"]) == case["model"]
def test_html_to_vdom_transform():
- source = "
hello
world
Hello World.
' + + expected = { + "attributes": {"style": {"backgroundColor": "green", "color": "red"}}, + "children": ["Hello World."], + "tagName": "p", + } + + assert html_to_vdom(source) == expected + + +def test_html_to_vdom_with_no_parent_node(): + source = "Hello