Skip to content

Commit

Permalink
Feature ancestry (#598)
Browse files Browse the repository at this point in the history
Ancestry exclusion for inline patterns.

Adds the ability for an inline pattern to define a list of ancestor tag names that should be avoided. If a pattern would create a descendant of one of the listed tag names, the pattern will not match. Fixes #596.
  • Loading branch information
facelessuser authored and waylan committed Nov 23, 2017
1 parent 007bd2a commit de5c696
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 13 deletions.
3 changes: 2 additions & 1 deletion .spell-dict
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ traceback
Tredinnick
Treeprocessor
Treeprocessors
tuple
tuples
unordered
untrusted
Expand All @@ -122,4 +123,4 @@ wiki
JavaScript
plugin
plugins
configs
configs
6 changes: 5 additions & 1 deletion docs/extensions/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ A pseudo example:
Inline Patterns {: #inlinepatterns }
------------------------------------

Inline Patterns implement the inline HTML element syntax for Markdown such as
Inline Patterns implement the inline HTML element syntax for Markdown such as
`*emphasis*` or `[links](http://example.com)`. Pattern objects should be
instances of classes that inherit from `markdown.inlinepatterns.Pattern` or
one of its children. Each pattern object uses a single regular expression and
Expand All @@ -68,6 +68,10 @@ must have the following methods:
Accepts a match object and returns an ElementTree element of a plain
Unicode string.

Also, Inline Patterns can define the property `ANCESTOR_EXCLUDES` with either
a list or tuple of undesirable ancestors. The pattern should not match if it
would cause the content to be a descendant of one of the defined tag names.

Note that any regular expression returned by `getCompiledRegExp` must capture
the whole block. Therefore, they should all start with `r'^(.*?)'` and end
with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method
Expand Down
2 changes: 2 additions & 0 deletions markdown/inlinepatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def attributeCallback(match):
class Pattern(object):
"""Base class that inline patterns subclass. """

ANCESTOR_EXCLUDES = tuple()

def __init__(self, pattern, markdown_instance=None):
"""
Create an instant of an inline pattern.
Expand Down
57 changes: 46 additions & 11 deletions markdown/treeprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self, md):
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
self.markdown = md
self.inlinePatterns = md.inlinePatterns
self.ancestors = []

def __makePlaceholder(self, type):
""" Generate a placeholder """
Expand Down Expand Up @@ -138,7 +139,7 @@ def __processElementText(self, node, subnode, isText=True):

childResult.reverse()
for newChild in childResult:
node.insert(pos, newChild)
node.insert(pos, newChild[0])

def __processPlaceholders(self, data, parent, isText=True):
"""
Expand All @@ -155,10 +156,10 @@ def __processPlaceholders(self, data, parent, isText=True):
def linkText(text):
if text:
if result:
if result[-1].tail:
result[-1].tail += text
if result[-1][0].tail:
result[-1][0].tail += text
else:
result[-1].tail = text
result[-1][0].tail = text
elif not isText:
if parent.tail:
parent.tail += text
Expand Down Expand Up @@ -199,7 +200,7 @@ def linkText(text):
continue

strartIndex = phEndIndex
result.append(node)
result.append((node, self.ancestors[:]))

else: # wrong placeholder
end = index + len(self.__placeholder_prefix)
Expand Down Expand Up @@ -230,6 +231,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
Returns: String with placeholders instead of ElementTree elements.
"""

for exclude in pattern.ANCESTOR_EXCLUDES:
if exclude.lower() in self.ancestors:
return data, False, 0

match = pattern.getCompiledRegExp().match(data[startIndex:])
leftData = data[:startIndex]

Expand All @@ -247,9 +253,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
for child in [node] + list(node):
if not isString(node):
if child.text:
self.ancestors.append(child.tag.lower())
child.text = self.__handleInline(
child.text, patternIndex + 1
)
self.ancestors.pop()
if child.tail:
child.tail = self.__handleInline(
child.tail, patternIndex
Expand All @@ -261,7 +269,17 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
match.group(1),
placeholder, match.groups()[-1]), True, 0

def run(self, tree):
def __build_ancestors(self, parent, parents):
"""Build the ancestor list."""
ancestors = []
while parent:
if parent:
ancestors.append(parent.tag.lower())
parent = self.parent_map.get(parent)
ancestors.reverse()
parents.extend(ancestors)

def run(self, tree, ancestors=None):
"""Apply inline patterns to a parsed Markdown tree.
Iterate over ElementTree, find elements with inline tag, apply inline
Expand All @@ -274,28 +292,42 @@ def run(self, tree):
Arguments:
* tree: ElementTree object, representing Markdown tree.
* ancestors: List of parent tag names that preceed the tree node (if needed).
Returns: ElementTree object with applied inline patterns.
"""
self.stashed_nodes = {}

stack = [tree]
# Ensure a valid parent list, but copy passed in lists
# to ensure we don't have the user accidentally change it on us.
tree_parents = [] if ancestors is None else ancestors[:]

self.parent_map = dict((c, p) for p in tree.getiterator() for c in p)
stack = [(tree, tree_parents)]

while stack:
currElement = stack.pop()
currElement, parents = stack.pop()

self.ancestors = parents
self.__build_ancestors(currElement, self.ancestors)

insertQueue = []
for child in currElement:
if child.text and not isinstance(
child.text, util.AtomicString
):
self.ancestors.append(child.tag.lower())
text = child.text
child.text = None
lst = self.__processPlaceholders(
self.__handleInline(text), child
)
for l in lst:
self.parent_map[l[0]] = child
stack += lst
insertQueue.append((child, lst))
self.ancestors.pop()
if child.tail:
tail = self.__handleInline(child.tail)
dumby = util.etree.Element('d')
Expand All @@ -306,9 +338,11 @@ def run(self, tree):
pos = list(currElement).index(child) + 1
tailResult.reverse()
for newChild in tailResult:
currElement.insert(pos, newChild)
self.parent_map[newChild[0]] = currElement
currElement.insert(pos, newChild[0])
if len(child):
stack.append(child)
self.parent_map[child] = currElement
stack.append((child, self.ancestors[:]))

for element, lst in insertQueue:
if self.markdown.enable_attributes:
Expand All @@ -317,7 +351,8 @@ def run(self, tree):
element.text, element
)
i = 0
for newChild in lst:
for obj in lst:
newChild = obj[0]
if self.markdown.enable_attributes:
# Processing attributes
if newChild.tail and isString(newChild.tail):
Expand Down
52 changes: 52 additions & 0 deletions tests/test_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,3 +770,55 @@ def testAppend(self):
self.assertEqual('|' in md.ESCAPED_CHARS, True)
md2 = markdown.Markdown()
self.assertEqual('|' not in md2.ESCAPED_CHARS, True)


class TestAncestorExclusion(unittest.TestCase):
""" Tests exclusion of tags in ancestor list. """

class AncestorExample(markdown.inlinepatterns.SimpleTagPattern):
""" Ancestor Test. """

ANCESTOR_EXCLUDES = ('a',)

def handleMatch(self, m):
""" Handle match. """
el = markdown.util.etree.Element(self.tag)
el.text = m.group(3)
return el

class AncestorExtension(markdown.Extension):

def __init__(self, *args, **kwargs):
"""Initialize."""

self.config = {}

def extendMarkdown(self, md, md_globals):
"""Modify inline patterns."""

pattern = r'(\+)([^\+]+)\2'
md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong')

def setUp(self):
"""Setup markdown object."""
self.md = markdown.Markdown(extensions=[TestAncestorExclusion.AncestorExtension()])

def test_ancestors(self):
""" Test that an extension can exclude parent tags. """
test = """
Some +test+ and a [+link+](http://test.com)
"""
result = """<p>Some <strong>test</strong> and a <a href="http://test.com">+link+</a></p>"""

self.md.reset()
self.assertEqual(self.md.convert(test), result)

def test_ancestors_tail(self):
""" Test that an extension can exclude parent tags when dealing with a tail. """
test = """
[***+em+*+strong+**](http://test.com)
"""
result = """<p><a href="http://test.com"><strong><em>+em+</em>+strong+</strong></a></p>"""

self.md.reset()
self.assertEqual(self.md.convert(test), result)

0 comments on commit de5c696

Please sign in to comment.