Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly parse code spans in md_in_html #1069

Merged
merged 6 commits into from
Nov 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/change_log/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ title: Change Log
Python-Markdown Change Log
=========================

Under development: version 3.3.4 (a bug-fix release).

* Properly parse code spans in md_in_html (#1069).

Oct 25, 2020: version 3.3.3 (a bug-fix release).

* Unify all block-level tags (#1047).
Expand Down
27 changes: 14 additions & 13 deletions markdown/extensions/md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from ..preprocessors import Preprocessor
from ..postprocessors import RawHtmlPostprocessor
from .. import util
from ..htmlparser import HTMLExtractor
from ..htmlparser import HTMLExtractor, blank_line_re
import xml.etree.ElementTree as etree


Expand Down Expand Up @@ -85,17 +85,9 @@ def get_state(self, tag, attrs):
else: # pragma: no cover
return None

def at_line_start(self):
"""At line start."""

value = super().at_line_start()
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
value = True
return value

def handle_starttag(self, tag, attrs):
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
if tag in self.empty_tags and (self.at_line_start() or self.intail):
attrs = {key: value if value is not None else key for key, value in attrs}
if "markdown" in attrs:
attrs.pop('markdown')
Expand All @@ -106,13 +98,12 @@ def handle_starttag(self, tag, attrs):
self.handle_empty_tag(data, True)
return

if tag in self.block_level_tags:
if tag in self.block_level_tags and (self.at_line_start() or self.intail):
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)

if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
if self.inraw or (state in [None, 'off'] and not self.mdstack):
# fall back to default behavior
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
Expand All @@ -134,6 +125,9 @@ def handle_starttag(self, tag, attrs):
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
if tag in self.CDATA_CONTENT_ELEMENTS:
# This is presumably a standalone tag in a code span (see #1036).
self.clear_cdata_mode()

def handle_endtag(self, tag):
if tag in self.block_level_tags:
Expand All @@ -159,6 +153,11 @@ def handle_endtag(self, tag):
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
self.state = []
# Check if element has a tail
if not blank_line_re.match(
self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
# More content exists after endtag.
self.intail = True
else:
# Treat orphan closing tag as a span level tag.
text = self.get_endtag_text(tag)
Expand Down Expand Up @@ -191,6 +190,8 @@ def handle_startendtag(self, tag, attrs):
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))

def handle_data(self, data):
if self.intail and '\n' in data:
self.intail = False
if self.inraw or not self.mdstack:
super().handle_data(data)
else:
Expand Down
10 changes: 8 additions & 2 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,14 @@ def close(self):
@property
def line_offset(self):
"""Returns char index in self.rawdata for the start of the current line. """
if self.lineno > 1:
return re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata).end()
if self.lineno > 1 and '\n' in self.rawdata:
m = re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata)
if m:
return m.end()
else: # pragma: no cover
# Value of self.lineno must exceed total number of lines.
# Find index of begining of last line.
return self.rawdata.rfind('\n')
return 0

def at_line_start(self):
Expand Down
66 changes: 66 additions & 0 deletions tests/test_syntax/extensions/test_md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,72 @@ def test_md1_div_linebreaks(self):
)
)

def test_md1_code_span(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
`<h1>code span</h1>`
</div>
"""
),
self.dedent(
"""
<div>
<p><code>&lt;h1&gt;code span&lt;/h1&gt;</code></p>
</div>
"""
)
)

def test_md1_code_span_oneline(self):
self.assertMarkdownRenders(
'<div markdown="1">`<h1>code span</h1>`</div>',
self.dedent(
"""
<div>
<p><code>&lt;h1&gt;code span&lt;/h1&gt;</code></p>
</div>
"""
)
)

def test_md1_code_span_unclosed(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
`<p>`
</div>
"""
),
self.dedent(
"""
<div>
<p><code>&lt;p&gt;</code></p>
</div>
"""
)
)

def test_md1_code_span_script_tag(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
`<script>`
</div>
"""
),
self.dedent(
"""
<div>
<p><code>&lt;script&gt;</code></p>
</div>
"""
)
)

def test_md1_div_blank_lines(self):
self.assertMarkdownRenders(
self.dedent(
Expand Down