Skip to content

Commit

Permalink
Account for Etree Elements in HTML Stash
Browse files Browse the repository at this point in the history
By calling str on all stash elements we ensure they don't raise an error.
Worse case, soemthing like `<Element 'div' at 0x000001B2DAE94900>` gets
inserted into the output. However, with the override in the md_in_html
extension, we actually serialize and reinsert the original HTML. Worse case,
an HTML block which should be parsed as Markdown gets skipped by the
extension (`<div markdown="block"></div>` gets inserting into the output).

The tricky part is testing as there should be no known cases where this
ever occurs. Therefore, we forefully pass an etree Element directly to
the method in the test. That said, as Python-Markdown#1040 is unresolved at this point,
I have tested locally with a real existing case and it works well.

Related to Python-Markdown#1040.
  • Loading branch information
waylan committed Oct 14, 2020
1 parent b4a399c commit 7b099ad
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
12 changes: 12 additions & 0 deletions markdown/extensions/md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from . import Extension
from ..blockprocessors import BlockProcessor
from ..preprocessors import Preprocessor
from ..postprocessors import RawHtmlPostprocessor
from .. import util
from ..htmlparser import HTMLExtractor
import xml.etree.ElementTree as etree
Expand Down Expand Up @@ -263,6 +264,15 @@ def run(self, parent, blocks):
return False


class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
def stash_to_string(self, text):
""" Override default to handle any etree elements still in the stash. """
if isinstance(text, etree.Element):
return self.md.serializer(text)
else:
return str(text)


class MarkdownInHtmlExtension(Extension):
"""Add Markdown parsing in HTML to Markdown class."""

Expand All @@ -275,6 +285,8 @@ def extendMarkdown(self, md):
md.parser.blockprocessors.register(
MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
)
# Replace raw HTML postprocessor
md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)


def makeExtension(**kwargs): # pragma: no cover
Expand Down
6 changes: 5 additions & 1 deletion markdown/postprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run(self, text):
""" Iterate over html stash and restore html. """
replacements = OrderedDict()
for i in range(self.md.htmlStash.html_counter):
html = self.md.htmlStash.rawHtmlBlocks[i]
html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
if self.isblocklevel(html):
replacements["<p>{}</p>".format(
self.md.htmlStash.get_placeholder(i))] = html
Expand All @@ -95,6 +95,10 @@ def isblocklevel(self, html):
return self.md.is_block_level(m.group(1))
return False

def stash_to_string(self, text):
""" Convert a stashed object to a string. """
return str(text)


class AndSubstitutePostprocessor(Postprocessor):
""" Restore valid entities """
Expand Down
17 changes: 16 additions & 1 deletion tests/test_syntax/extensions/test_md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@
from unittest import TestSuite
from markdown.test_tools import TestCase
from ..blocks.test_html_blocks import TestHTMLBlocks
from markdown import Markdown
from xml.etree.ElementTree import Element


class TestMarkdownInHTMLPostProcessor(TestCase):
""" Ensure any remaining elements in HTML stash are properly serialized. """

def test_stash_to_string(self):
# There should be no known cases where this actually happens so we need to
# forcefully pass an etree Element to the method to ensure proper behavior.
element = Element('div')
element.text = 'Foo bar.'
md = Markdown(extensions=['md_in_html'])
result = md.postprocessors['raw_html'].stash_to_string(element)
self.assertEqual(result, '<div>Foo bar.</div>')


class TestDefaultwMdInHTML(TestHTMLBlocks):
Expand Down Expand Up @@ -758,7 +773,7 @@ def test_md1_nested_footnote_ref(self):
def load_tests(loader, tests, pattern):
''' Ensure TestHTMLBlocks doesn't get run twice by excluding it here. '''
suite = TestSuite()
for test_class in [TestDefaultwMdInHTML, TestMdInHTML]:
for test_class in [TestDefaultwMdInHTML, TestMdInHTML, TestMarkdownInHTMLPostProcessor]:
tests = loader.loadTestsFromTestCase(test_class)
suite.addTests(tests)
return suite

0 comments on commit 7b099ad

Please sign in to comment.