From 2766698ac88ae9218d41b3ca1d9fbd4b4bd105e5 Mon Sep 17 00:00:00 2001
From: Isaac Muse
Date: Mon, 19 Oct 2020 12:07:45 -0600
Subject: [PATCH] Properly parse inline HTML in md_in_html
Fixes #1040 and fixes #1045.
---
markdown/extensions/md_in_html.py | 36 +++-
.../test_syntax/extensions/test_md_in_html.py | 160 ++++++++++++++++++
2 files changed, 191 insertions(+), 5 deletions(-)
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
index 174224ab9..f63556398 100644
--- a/markdown/extensions/md_in_html.py
+++ b/markdown/extensions/md_in_html.py
@@ -86,6 +86,14 @@ def get_state(self, tag, attrs):
else: # pragma: no cover
return None
+ def at_line_start(self):
+ """At line start."""
+
+ value = super().at_line_start()
+ if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
+ value = True
+ return value
+
def handle_starttag(self, tag, attrs):
if tag in block_level_tags:
# Valueless attr (ex: ``) results in `[('checked', None)]`.
@@ -93,7 +101,7 @@ def handle_starttag(self, tag, attrs):
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)
- if self.inraw or (state in [None, 'off'] and not self.mdstack):
+ if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
# fall back to default behavior
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
@@ -111,7 +119,10 @@ def handle_starttag(self, tag, attrs):
super().handle_starttag(tag, attrs)
else:
text = self.get_starttag_text()
- self.handle_data(text)
+ if self.mdstate and self.mdstate[-1] == "off":
+ self.handle_data(self.md.htmlStash.store(text))
+ else:
+ self.handle_data(text)
def handle_endtag(self, tag):
if tag in block_level_tags:
@@ -128,20 +139,32 @@ def handle_endtag(self, tag):
if not self.mdstack:
# Last item in stack is closed. Stash it
element = self.get_element()
+ # Get last entry to see if it ends in newlines
+ # If it is an element, assume there is no newlines
+ item = self.cleandoc[-1] if self.cleandoc else ''
+ # If we only have one newline before block element, add another
+ if not item.endswith('\n\n') and item.endswith('\n'):
+ self.cleandoc.append('\n')
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
self.state = []
else:
# Treat orphan closing tag as a span level tag.
text = self.get_endtag_text(tag)
- self.handle_data(text)
+ if self.mdstate and self.mdstate[-1] == "off":
+ self.handle_data(self.md.htmlStash.store(text))
+ else:
+ self.handle_data(text)
else:
# Span level tag
if self.inraw:
super().handle_endtag(tag)
else:
text = self.get_endtag_text(tag)
- self.handle_data(text)
+ if self.mdstate and self.mdstate[-1] == "off":
+ self.handle_data(self.md.htmlStash.store(text))
+ else:
+ self.handle_data(text)
def handle_data(self, data):
if self.inraw or not self.mdstack:
@@ -156,7 +179,10 @@ def handle_empty_tag(self, data, is_block):
if self.at_line_start() and is_block:
self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
else:
- self.handle_data(data)
+ if self.mdstate and self.mdstate[-1] == "off":
+ self.handle_data(self.md.htmlStash.store(data))
+ else:
+ self.handle_data(data)
class HtmlBlockPreprocessor(Preprocessor):
diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py
index 433cdd559..946e9225a 100644
--- a/tests/test_syntax/extensions/test_md_in_html.py
+++ b/tests/test_syntax/extensions/test_md_in_html.py
@@ -390,6 +390,166 @@ def test_md1_nested_empty_block(self):
)
)
+ def test_orphan_end_tag_in_raw_html(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+
+
+ Test
+
+
+
+ Test
+
+
+ """
+ ),
+ self.dedent(
+ """
+
+
+ Test
+
+
+
+ Test
+
+
+ """
+ )
+ )
+
+ def test_complex_nested_case(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+
+ **test**
+
+ **test**
+
+
Test
+
**test**
+
Test 2
+
+
+ """
+ ),
+ self.dedent(
+ """
+
+
test
+
+ **test**
+
+
Test
+
**test**
+
Test 2
+
+
+ """
+ )
+ )
+
+ def test_complex_nested_case_whitespace(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ Text with space\t
+ \t
+ \t
+
+ **test**
+
+
Test
+
**test**
+
With whitespace
+
Test 2
+
+ **test**
+
+ """
+ ),
+ self.dedent(
+ """
+ Text with space
+
+
+ **test**
+
+
Test
+
**test**
+
With whitespace
+
Test 2
+
+
test
+
+ """
+ )
+ )
+
+ def test_md1_intail_md1(self):
+ self.assertMarkdownRenders(
+ '*foo*
*bar*
',
+ self.dedent(
+ """
+
+
+ """
+ )
+ )
+
+ def test_md1_no_blank_line_before(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ A _Markdown_ paragraph with no blank line after.
+
+ A _Markdown_ paragraph in an HTML block with no blank line before.
+
+ """
+ ),
+ self.dedent(
+ """
+ A Markdown paragraph with no blank line after.
+
+
A Markdown paragraph in an HTML block with no blank line before.
+
+ """
+ )
+ )
+
+ def test_md1_no_line_break(self):
+ # The div here is parsed as a span-level element. Bad input equals bad output!
+ self.assertMarkdownRenders(
+ 'A _Markdown_ paragraph with no _line break_.
',
+ 'A Markdown paragraph with
no line break.
'
+ )
+
+ def test_md1_in_tail(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+
+ A _Markdown_ paragraph in an HTML block in tail of previous element.
+
+ """
+ ),
+ self.dedent(
+ """
+
+
+
A Markdown paragraph in an HTML block in tail of previous element.
+
+ """
+ )
+ )
+
def test_md_span_paragraph(self):
self.assertMarkdownRenders(
'*foo*
',