diff --git a/enex2notion/note_parser/webclip.py b/enex2notion/note_parser/webclip.py index dca47bd..f542a41 100644 --- a/enex2notion/note_parser/webclip.py +++ b/enex2notion/note_parser/webclip.py @@ -4,6 +4,7 @@ from enex2notion.note_parser.blocks import parse_note_blocks from enex2notion.note_parser.webclip_stages_cleanup import ( + fix_redundant_img_src_quotes, remove_empty_blocks, strip_paragraphs, wrap_orphans, @@ -74,6 +75,7 @@ def parse_webclip(note_dom: Tag): remove_empty_blocks, wrap_orphans, strip_paragraphs, + fix_redundant_img_src_quotes, ) for processor in processors: diff --git a/enex2notion/note_parser/webclip_stages_cleanup.py b/enex2notion/note_parser/webclip_stages_cleanup.py index c638e03..48fc563 100644 --- a/enex2notion/note_parser/webclip_stages_cleanup.py +++ b/enex2notion/note_parser/webclip_stages_cleanup.py @@ -22,6 +22,12 @@ def wrap_orphans(root: Tag): _convert_to_paragraph(element) +def fix_redundant_img_src_quotes(root: Tag): + for e in root.find_all("img"): + if e.get("src"): + e["src"] = e["src"].strip("\"'") + + def _convert_to_paragraph(element): div = Tag(name="div") diff --git a/tests/test_webclip_parser.py b/tests/test_webclip_parser.py index b232427..c854427 100644 --- a/tests/test_webclip_parser.py +++ b/tests/test_webclip_parser.py @@ -1,7 +1,11 @@ +import base64 + +from enex2notion.enex_types import EvernoteResource from enex2notion.note_parser.webclip import parse_webclip from enex2notion.notion_blocks.header import NotionSubsubheaderBlock from enex2notion.notion_blocks.list import NotionBulletedListBlock from enex2notion.notion_blocks.text import NotionTextBlock, TextProp +from enex2notion.notion_blocks.uploadable import NotionImageBlock def test_empty(parse_html): @@ -208,3 +212,23 @@ def test_flatten_bad_inline(parse_html): assert parse_webclip(test_note) == [ NotionTextBlock(text_prop=TextProp("test")), ] + + +def test_embedded_inline_img_bin_bad_quotes(parse_html, smallest_gif): + test_note = parse_html( + f"' + ) + + result_block = parse_webclip(test_note)[0] + + assert result_block == NotionImageBlock( + md5_hash=smallest_gif.md5, + resource=EvernoteResource( + data_bin=smallest_gif.data_bin, + size=smallest_gif.size, + md5=smallest_gif.md5, + mime=smallest_gif.mime, + file_name=f"{smallest_gif.md5}.gif", + ), + )