From 952feb2b6905ecd65e0c2ba491bef16cd2f24ea9 Mon Sep 17 00:00:00 2001 From: vzhd1701 Date: Mon, 6 Dec 2021 16:18:53 +0500 Subject: [PATCH] fix: parse unexpected elements in lists as text --- enex2notion/note_parser_elements.py | 21 +++++++++++++++++++-- tests/test_note_parser.py | 26 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/enex2notion/note_parser_elements.py b/enex2notion/note_parser_elements.py index bde216b..442c943 100644 --- a/enex2notion/note_parser_elements.py +++ b/enex2notion/note_parser_elements.py @@ -1,7 +1,8 @@ import logging -from bs4 import Tag +from bs4 import NavigableString, Tag +from enex2notion.notion_blocks import NotionTextBlock, TextProp from enex2notion.notion_blocks_header import ( NotionHeaderBlock, NotionSubheaderBlock, @@ -36,11 +37,27 @@ def parse_list(element: Tag): nodes[-1].children.extend(parse_list(subelement)) else: - logger.warning(f"Unexpected tag inside list: {subelement.name}") + li_odd_item = _parse_odd_item(subelement) + if li_odd_item is None: + continue + + nodes.append(li_odd_item) return nodes +def _parse_odd_item(element: Tag): + if isinstance(element, NavigableString): + if not element.text.strip(): + return None + + logger.warning("Non-empty string element inside list") + return NotionTextBlock(text_prop=TextProp(text=element.text.strip())) + + logger.warning(f"Unexpected tag inside list: {element.name}, parsing as text") + return NotionTextBlock(text_prop=extract_string(element)) + + def _parse_list_item(list_item, is_ul): li_text = extract_string(list_item) diff --git a/tests/test_note_parser.py b/tests/test_note_parser.py index d2c50f2..11db5e2 100644 --- a/tests/test_note_parser.py +++ b/tests/test_note_parser.py @@ -119,6 +119,32 @@ def test_list_ul_nested(): assert parse_note_dom(test_note) == expected +def test_list_ul_strings_inside(caplog): + test_note = parse_html("") + + with caplog.at_level(logging.WARNING): + result_blocks = parse_note_dom(test_note) + + assert "Non-empty string element inside list" in caplog.records[0].message + assert result_blocks == [ + NotionBulletedListBlock(text_prop=TextProp("test1")), + NotionTextBlock(text_prop=TextProp("test2")), + ] + + +def test_list_ul_unexpected_inside(caplog): + test_note = parse_html("") + + with caplog.at_level(logging.WARNING): + result_blocks = parse_note_dom(test_note) + + assert "Unexpected tag inside list" in caplog.records[0].message + assert result_blocks == [ + NotionBulletedListBlock(text_prop=TextProp("test1")), + NotionTextBlock(text_prop=TextProp("test2")), + ] + + def test_table(): test_note = parse_html( ""