Skip to content

Commit

Permalink
fix: parse unexpected elements in lists as text
Browse files Browse the repository at this point in the history
  • Loading branch information
vzhd1701 committed Dec 6, 2021
1 parent 3d1f045 commit 952feb2
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
21 changes: 19 additions & 2 deletions enex2notion/note_parser_elements.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging

from bs4 import Tag
from bs4 import NavigableString, Tag

from enex2notion.notion_blocks import NotionTextBlock, TextProp
from enex2notion.notion_blocks_header import (
NotionHeaderBlock,
NotionSubheaderBlock,
Expand Down Expand Up @@ -36,11 +37,27 @@ def parse_list(element: Tag):
nodes[-1].children.extend(parse_list(subelement))

else:
logger.warning(f"Unexpected tag inside list: {subelement.name}")
li_odd_item = _parse_odd_item(subelement)
if li_odd_item is None:
continue

nodes.append(li_odd_item)

return nodes


def _parse_odd_item(element: Tag):
if isinstance(element, NavigableString):
if not element.text.strip():
return None

logger.warning("Non-empty string element inside list")
return NotionTextBlock(text_prop=TextProp(text=element.text.strip()))

logger.warning(f"Unexpected tag inside list: {element.name}, parsing as text")
return NotionTextBlock(text_prop=extract_string(element))


def _parse_list_item(list_item, is_ul):
li_text = extract_string(list_item)

Expand Down
26 changes: 26 additions & 0 deletions tests/test_note_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,32 @@ def test_list_ul_nested():
assert parse_note_dom(test_note) == expected


def test_list_ul_strings_inside(caplog):
test_note = parse_html("<ul><li><div>test1</div></li>test2</ul>")

with caplog.at_level(logging.WARNING):
result_blocks = parse_note_dom(test_note)

assert "Non-empty string element inside list" in caplog.records[0].message
assert result_blocks == [
NotionBulletedListBlock(text_prop=TextProp("test1")),
NotionTextBlock(text_prop=TextProp("test2")),
]


def test_list_ul_unexpected_inside(caplog):
test_note = parse_html("<ul><li><div>test1</div></li><span>test2</span></ul>")

with caplog.at_level(logging.WARNING):
result_blocks = parse_note_dom(test_note)

assert "Unexpected tag inside list" in caplog.records[0].message
assert result_blocks == [
NotionBulletedListBlock(text_prop=TextProp("test1")),
NotionTextBlock(text_prop=TextProp("test2")),
]


def test_table():
test_note = parse_html(
"<table>"
Expand Down

0 comments on commit 952feb2

Please sign in to comment.