Skip to content

Commit

Permalink
fix: convert bad img src quotes when parsing weblicp
Browse files Browse the repository at this point in the history
fix #79
  • Loading branch information
vzhd1701 committed Oct 19, 2023
1 parent f6f31bf commit 080d7e3
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 0 deletions.
2 changes: 2 additions & 0 deletions enex2notion/note_parser/webclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from enex2notion.note_parser.blocks import parse_note_blocks
from enex2notion.note_parser.webclip_stages_cleanup import (
fix_redundant_img_src_quotes,
remove_empty_blocks,
strip_paragraphs,
wrap_orphans,
Expand Down Expand Up @@ -74,6 +75,7 @@ def parse_webclip(note_dom: Tag):
remove_empty_blocks,
wrap_orphans,
strip_paragraphs,
fix_redundant_img_src_quotes,
)

for processor in processors:
Expand Down
6 changes: 6 additions & 0 deletions enex2notion/note_parser/webclip_stages_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ def wrap_orphans(root: Tag):
_convert_to_paragraph(element)


def fix_redundant_img_src_quotes(root: Tag):
for e in root.find_all("img"):
if e.get("src"):
e["src"] = e["src"].strip("\"'")


def _convert_to_paragraph(element):
div = Tag(name="div")

Expand Down
24 changes: 24 additions & 0 deletions tests/test_webclip_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import base64

from enex2notion.enex_types import EvernoteResource
from enex2notion.note_parser.webclip import parse_webclip
from enex2notion.notion_blocks.header import NotionSubsubheaderBlock
from enex2notion.notion_blocks.list import NotionBulletedListBlock
from enex2notion.notion_blocks.text import NotionTextBlock, TextProp
from enex2notion.notion_blocks.uploadable import NotionImageBlock


def test_empty(parse_html):
Expand Down Expand Up @@ -208,3 +212,23 @@ def test_flatten_bad_inline(parse_html):
assert parse_webclip(test_note) == [
NotionTextBlock(text_prop=TextProp("test")),
]


def test_embedded_inline_img_bin_bad_quotes(parse_html, smallest_gif):
test_note = parse_html(
f"<img src=\"'data:{smallest_gif.mime};"
f'base64,{base64.b64encode(smallest_gif.data_bin).decode("utf-8")}\'" />'
)

result_block = parse_webclip(test_note)[0]

assert result_block == NotionImageBlock(
md5_hash=smallest_gif.md5,
resource=EvernoteResource(
data_bin=smallest_gif.data_bin,
size=smallest_gif.size,
md5=smallest_gif.md5,
mime=smallest_gif.mime,
file_name=f"{smallest_gif.md5}.gif",
),
)

0 comments on commit 080d7e3

Please sign in to comment.