Skip to content

Commit

Permalink
Properly parse inline HTML in md_in_html
Browse files Browse the repository at this point in the history
Fixes #1040 and fixes #1045.
  • Loading branch information
facelessuser authored Oct 19, 2020
1 parent 607a091 commit 2766698
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 5 deletions.
36 changes: 31 additions & 5 deletions markdown/extensions/md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,22 @@ def get_state(self, tag, attrs):
else: # pragma: no cover
return None

def at_line_start(self):
"""At line start."""

value = super().at_line_start()
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
value = True
return value

def handle_starttag(self, tag, attrs):
if tag in block_level_tags:
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)

if self.inraw or (state in [None, 'off'] and not self.mdstack):
if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
# fall back to default behavior
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
Expand All @@ -111,7 +119,10 @@ def handle_starttag(self, tag, attrs):
super().handle_starttag(tag, attrs)
else:
text = self.get_starttag_text()
self.handle_data(text)
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)

def handle_endtag(self, tag):
if tag in block_level_tags:
Expand All @@ -128,20 +139,32 @@ def handle_endtag(self, tag):
if not self.mdstack:
# Last item in stack is closed. Stash it
element = self.get_element()
# Get last entry to see if it ends in newlines
# If it is an element, assume there is no newlines
item = self.cleandoc[-1] if self.cleandoc else ''
# If we only have one newline before block element, add another
if not item.endswith('\n\n') and item.endswith('\n'):
self.cleandoc.append('\n')
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
self.state = []
else:
# Treat orphan closing tag as a span level tag.
text = self.get_endtag_text(tag)
self.handle_data(text)
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
else:
# Span level tag
if self.inraw:
super().handle_endtag(tag)
else:
text = self.get_endtag_text(tag)
self.handle_data(text)
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)

def handle_data(self, data):
if self.inraw or not self.mdstack:
Expand All @@ -156,7 +179,10 @@ def handle_empty_tag(self, data, is_block):
if self.at_line_start() and is_block:
self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
else:
self.handle_data(data)
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(data))
else:
self.handle_data(data)


class HtmlBlockPreprocessor(Preprocessor):
Expand Down
160 changes: 160 additions & 0 deletions tests/test_syntax/extensions/test_md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,166 @@ def test_md1_nested_empty_block(self):
)
)

def test_orphan_end_tag_in_raw_html(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
<div>
Test
</pre>
Test
</div>
</div>
"""
),
self.dedent(
"""
<div>
<div>
Test
</pre>
Test
</div>
</div>
"""
)
)

def test_complex_nested_case(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
**test**
<div>
**test**
<img src=""/>
<code>Test</code>
<span>**test**</span>
<p>Test 2</p>
</div>
</div>
"""
),
self.dedent(
"""
<div>
<p><strong>test</strong></p>
<div>
**test**
<img src=""/>
<code>Test</code>
<span>**test**</span>
<p>Test 2</p>
</div>
</div>
"""
)
)

def test_complex_nested_case_whitespace(self):
self.assertMarkdownRenders(
self.dedent(
"""
Text with space\t
<div markdown="1">\t
\t
<div>
**test**
<img src=""/>
<code>Test</code>
<span>**test**</span>
<div>With whitespace</div>
<p>Test 2</p>
</div>
**test**
</div>
"""
),
self.dedent(
"""
<p>Text with space </p>
<div>
<div>
**test**
<img src=""/>
<code>Test</code>
<span>**test**</span>
<div>With whitespace</div>
<p>Test 2</p>
</div>
<p><strong>test</strong></p>
</div>
"""
)
)

def test_md1_intail_md1(self):
self.assertMarkdownRenders(
'<div markdown="1">*foo*</div><div markdown="1">*bar*</div>',
self.dedent(
"""
<div>
<p><em>foo</em></p>
</div>
<div>
<p><em>bar</em></p>
</div>
"""
)
)

def test_md1_no_blank_line_before(self):
self.assertMarkdownRenders(
self.dedent(
"""
A _Markdown_ paragraph with no blank line after.
<div markdown="1">
A _Markdown_ paragraph in an HTML block with no blank line before.
</div>
"""
),
self.dedent(
"""
<p>A <em>Markdown</em> paragraph with no blank line after.</p>
<div>
<p>A <em>Markdown</em> paragraph in an HTML block with no blank line before.</p>
</div>
"""
)
)

def test_md1_no_line_break(self):
# The div here is parsed as a span-level element. Bad input equals bad output!
self.assertMarkdownRenders(
'A _Markdown_ paragraph with <div markdown="1">no _line break_.</div>',
'<p>A <em>Markdown</em> paragraph with <div markdown="1">no <em>line break</em>.</div></p>'
)

def test_md1_in_tail(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div></div><div markdown="1">
A _Markdown_ paragraph in an HTML block in tail of previous element.
</div>
"""
),
self.dedent(
"""
<div></div>
<div>
<p>A <em>Markdown</em> paragraph in an HTML block in tail of previous element.</p>
</div>
"""
)
)

def test_md_span_paragraph(self):
self.assertMarkdownRenders(
'<p markdown="span">*foo*</p>',
Expand Down

0 comments on commit 2766698

Please sign in to comment.