Feature ancestry (#598)

Ancestry exclusion for inline patterns. Adds the ability for an inline pattern to define a list of ancestor tag names that should be avoided. If a pattern would create a descendant of one of the listed tag names, the pattern will not match. Fixes #596.
Python-Markdown · Nov 23, 2017 · de5c696 · de5c696
1 parent 007bd2a
commit de5c696
Show file tree

Hide file tree

Showing 5 changed files with 107 additions and 13 deletions.
diff --git a/.spell-dict b/.spell-dict
@@ -103,6 +103,7 @@ traceback
 Tredinnick
 Treeprocessor
 Treeprocessors
+tuple
 tuples
 unordered
 untrusted
@@ -122,4 +123,4 @@ wiki
 JavaScript
 plugin
 plugins
-configs
+configs
diff --git a/docs/extensions/api.txt b/docs/extensions/api.txt
@@ -53,7 +53,7 @@ A pseudo example:
 Inline Patterns {: #inlinepatterns }
 ------------------------------------
 
-Inline Patterns implement the inline HTML element syntax for Markdown such as
+Inline Patterns implement the inline HTML element syntax for Markdown such as 
 `*emphasis*` or `[links](http://example.com)`. Pattern objects should be 
 instances of classes that inherit from `markdown.inlinepatterns.Pattern` or 
 one of its children. Each pattern object uses a single regular expression and 
@@ -68,6 +68,10 @@ must have the following methods:
     Accepts a match object and returns an ElementTree element of a plain 
     Unicode string.
 
+Also, Inline Patterns can define the property `ANCESTOR_EXCLUDES` with either 
+a list or tuple of undesirable ancestors. The pattern should not match if it 
+would cause the content to be a descendant of one of the defined tag names.
+
 Note that any regular expression returned by `getCompiledRegExp` must capture
 the whole block. Therefore, they should all start with `r'^(.*?)'` and end
 with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method 

diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
@@ -189,6 +189,8 @@ def attributeCallback(match):
 class Pattern(object):
     """Base class that inline patterns subclass. """
 
+    ANCESTOR_EXCLUDES = tuple()
+
     def __init__(self, pattern, markdown_instance=None):
         """
         Create an instant of an inline pattern.

diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
@@ -54,6 +54,7 @@ def __init__(self, md):
         self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
         self.markdown = md
         self.inlinePatterns = md.inlinePatterns
+        self.ancestors = []
 
     def __makePlaceholder(self, type):
         """ Generate a placeholder """
@@ -138,7 +139,7 @@ def __processElementText(self, node, subnode, isText=True):
 
         childResult.reverse()
         for newChild in childResult:
-            node.insert(pos, newChild)
+            node.insert(pos, newChild[0])
 
     def __processPlaceholders(self, data, parent, isText=True):
         """
@@ -155,10 +156,10 @@ def __processPlaceholders(self, data, parent, isText=True):
         def linkText(text):
             if text:
                 if result:
-                    if result[-1].tail:
-                        result[-1].tail += text
+                    if result[-1][0].tail:
+                        result[-1][0].tail += text
                     else:
-                        result[-1].tail = text
+                        result[-1][0].tail = text
                 elif not isText:
                     if parent.tail:
                         parent.tail += text
@@ -199,7 +200,7 @@ def linkText(text):
                         continue
 
                     strartIndex = phEndIndex
-                    result.append(node)
+                    result.append((node, self.ancestors[:]))
 
                 else:  # wrong placeholder
                     end = index + len(self.__placeholder_prefix)
@@ -230,6 +231,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
         Returns: String with placeholders instead of ElementTree elements.
 
         """
+
+        for exclude in pattern.ANCESTOR_EXCLUDES:
+            if exclude.lower() in self.ancestors:
+                return data, False, 0
+
         match = pattern.getCompiledRegExp().match(data[startIndex:])
         leftData = data[:startIndex]
 
@@ -247,9 +253,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
                 for child in [node] + list(node):
                     if not isString(node):
                         if child.text:
+                            self.ancestors.append(child.tag.lower())
                             child.text = self.__handleInline(
                                 child.text, patternIndex + 1
                             )
+                            self.ancestors.pop()
                         if child.tail:
                             child.tail = self.__handleInline(
                                 child.tail, patternIndex
@@ -261,7 +269,17 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
                              match.group(1),
                              placeholder, match.groups()[-1]), True, 0
 
-    def run(self, tree):
+    def __build_ancestors(self, parent, parents):
+        """Build the ancestor list."""
+        ancestors = []
+        while parent:
+            if parent:
+                ancestors.append(parent.tag.lower())
+            parent = self.parent_map.get(parent)
+        ancestors.reverse()
+        parents.extend(ancestors)
+
+    def run(self, tree, ancestors=None):
         """Apply inline patterns to a parsed Markdown tree.
 
         Iterate over ElementTree, find elements with inline tag, apply inline
@@ -274,28 +292,42 @@ def run(self, tree):
         Arguments:
 
         * tree: ElementTree object, representing Markdown tree.
+        * ancestors: List of parent tag names that preceed the tree node (if needed).
 
         Returns: ElementTree object with applied inline patterns.
 
         """
         self.stashed_nodes = {}
 
-        stack = [tree]
+        # Ensure a valid parent list, but copy passed in lists
+        # to ensure we don't have the user accidentally change it on us.
+        tree_parents = [] if ancestors is None else ancestors[:]
+
+        self.parent_map = dict((c, p) for p in tree.getiterator() for c in p)
+        stack = [(tree, tree_parents)]
 
         while stack:
-            currElement = stack.pop()
+            currElement, parents = stack.pop()
+
+            self.ancestors = parents
+            self.__build_ancestors(currElement, self.ancestors)
+
             insertQueue = []
             for child in currElement:
                 if child.text and not isinstance(
                     child.text, util.AtomicString
                 ):
+                    self.ancestors.append(child.tag.lower())
                     text = child.text
                     child.text = None
                     lst = self.__processPlaceholders(
                         self.__handleInline(text), child
                     )
+                    for l in lst:
+                        self.parent_map[l[0]] = child
                     stack += lst
                     insertQueue.append((child, lst))
+                    self.ancestors.pop()
                 if child.tail:
                     tail = self.__handleInline(child.tail)
                     dumby = util.etree.Element('d')
@@ -306,9 +338,11 @@ def run(self, tree):
                     pos = list(currElement).index(child) + 1
                     tailResult.reverse()
                     for newChild in tailResult:
-                        currElement.insert(pos, newChild)
+                        self.parent_map[newChild[0]] = currElement
+                        currElement.insert(pos, newChild[0])
                 if len(child):
-                    stack.append(child)
+                    self.parent_map[child] = currElement
+                    stack.append((child, self.ancestors[:]))
 
             for element, lst in insertQueue:
                 if self.markdown.enable_attributes:
@@ -317,7 +351,8 @@ def run(self, tree):
                             element.text, element
                         )
                 i = 0
-                for newChild in lst:
+                for obj in lst:
+                    newChild = obj[0]
                     if self.markdown.enable_attributes:
                         # Processing attributes
                         if newChild.tail and isString(newChild.tail):

diff --git a/tests/test_apis.py b/tests/test_apis.py
@@ -770,3 +770,55 @@ def testAppend(self):
         self.assertEqual('|' in md.ESCAPED_CHARS, True)
         md2 = markdown.Markdown()
         self.assertEqual('|' not in md2.ESCAPED_CHARS, True)
+
+
+class TestAncestorExclusion(unittest.TestCase):
+    """ Tests exclusion of tags in ancestor list. """
+
+    class AncestorExample(markdown.inlinepatterns.SimpleTagPattern):
+        """ Ancestor Test. """
+
+        ANCESTOR_EXCLUDES = ('a',)
+
+        def handleMatch(self, m):
+            """ Handle match. """
+            el = markdown.util.etree.Element(self.tag)
+            el.text = m.group(3)
+            return el
+
+    class AncestorExtension(markdown.Extension):
+
+        def __init__(self, *args, **kwargs):
+            """Initialize."""
+
+            self.config = {}
+
+        def extendMarkdown(self, md, md_globals):
+            """Modify inline patterns."""
+
+            pattern = r'(\+)([^\+]+)\2'
+            md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong')
+
+    def setUp(self):
+        """Setup markdown object."""
+        self.md = markdown.Markdown(extensions=[TestAncestorExclusion.AncestorExtension()])
+
+    def test_ancestors(self):
+        """ Test that an extension can exclude parent tags. """
+        test = """
+Some +test+ and a [+link+](http://test.com)
+"""
+        result = """<p>Some <strong>test</strong> and a <a href="http://test.com">+link+</a></p>"""
+
+        self.md.reset()
+        self.assertEqual(self.md.convert(test), result)
+
+    def test_ancestors_tail(self):
+        """ Test that an extension can exclude parent tags when dealing with a tail. """
+        test = """
+[***+em+*+strong+**](http://test.com)
+"""
+        result = """<p><a href="http://test.com"><strong><em>+em+</em>+strong+</strong></a></p>"""
+
+        self.md.reset()
+        self.assertEqual(self.md.convert(test), result)