1# markdown is released under the BSD license
2# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4# Copyright 2004 Manfred Stienstra (the original version)
5#
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are met:
10#
11# *   Redistributions of source code must retain the above copyright
12#     notice, this list of conditions and the following disclaimer.
13# *   Redistributions in binary form must reproduce the above copyright
14#     notice, this list of conditions and the following disclaimer in the
15#     documentation and/or other materials provided with the distribution.
16# *   Neither the name of the <organization> nor the
17#     names of its contributors may be used to endorse or promote products
18#     derived from this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30# POSSIBILITY OF SUCH DAMAGE.
31
32
33from __future__ import unicode_literals
34from __future__ import absolute_import
35from . import util
36from . import odict
37from . import inlinepatterns
38
39
40def build_treeprocessors(md_instance, **kwargs):
41    """ Build the default treeprocessors for Markdown. """
42    treeprocessors = odict.OrderedDict()
43    treeprocessors["inline"] = InlineProcessor(md_instance)
44    treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
45    return treeprocessors
46
47
48def isString(s):
49    """ Check if it's string """
50    if not isinstance(s, util.AtomicString):
51        return isinstance(s, util.string_type)
52    return False
53
54
55class Treeprocessor(util.Processor):
56    """
57    Treeprocessors are run on the ElementTree object before serialization.
58
59    Each Treeprocessor implements a "run" method that takes a pointer to an
60    ElementTree, modifies it as necessary and returns an ElementTree
61    object.
62
63    Treeprocessors must extend markdown.Treeprocessor.
64
65    """
66    def run(self, root):
67        """
68        Subclasses of Treeprocessor should implement a `run` method, which
69        takes a root ElementTree. This method can return another ElementTree
70        object, and the existing root ElementTree will be replaced, or it can
71        modify the current tree and return None.
72        """
73        pass
74
75
76class InlineProcessor(Treeprocessor):
77    """
78    A Treeprocessor that traverses a tree, applying inline patterns.
79    """
80
81    def __init__(self, md):
82        self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
83        self.__placeholder_suffix = util.ETX
84        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
85                                      + len(self.__placeholder_suffix)
86        self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
87        self.markdown = md
88
89    def __makePlaceholder(self, type):
90        """ Generate a placeholder """
91        id = "%04d" % len(self.stashed_nodes)
92        hash = util.INLINE_PLACEHOLDER % id
93        return hash, id
94
95    def __findPlaceholder(self, data, index):
96        """
97        Extract id from data string, start from index
98
99        Keyword arguments:
100
101        * data: string
102        * index: index, from which we start search
103
104        Returns: placeholder id and string index, after the found placeholder.
105
106        """
107        m = self.__placeholder_re.search(data, index)
108        if m:
109            return m.group(1), m.end()
110        else:
111            return None, index + 1
112
113    def __stashNode(self, node, type):
114        """ Add node to stash """
115        placeholder, id = self.__makePlaceholder(type)
116        self.stashed_nodes[id] = node
117        return placeholder
118
119    def __handleInline(self, data, patternIndex=0):
120        """
121        Process string with inline patterns and replace it
122        with placeholders
123
124        Keyword arguments:
125
126        * data: A line of Markdown text
127        * patternIndex: The index of the inlinePattern to start with
128
129        Returns: String with placeholders.
130
131        """
132        if not isinstance(data, util.AtomicString):
133            startIndex = 0
134            while patternIndex < len(self.markdown.inlinePatterns):
135                data, matched, startIndex = self.__applyPattern(
136                    self.markdown.inlinePatterns.value_for_index(patternIndex),
137                    data, patternIndex, startIndex)
138                if not matched:
139                    patternIndex += 1
140        return data
141
142    def __processElementText(self, node, subnode, isText=True):
143        """
144        Process placeholders in Element.text or Element.tail
145        of Elements popped from self.stashed_nodes.
146
147        Keywords arguments:
148
149        * node: parent node
150        * subnode: processing node
151        * isText: bool variable, True - it's text, False - it's tail
152
153        Returns: None
154
155        """
156        if isText:
157            text = subnode.text
158            subnode.text = None
159        else:
160            text = subnode.tail
161            subnode.tail = None
162
163        childResult = self.__processPlaceholders(text, subnode)
164
165        if not isText and node is not subnode:
166            pos = node.getchildren().index(subnode)
167            node.remove(subnode)
168        else:
169            pos = 0
170
171        childResult.reverse()
172        for newChild in childResult:
173            node.insert(pos, newChild)
174
175    def __processPlaceholders(self, data, parent):
176        """
177        Process string with placeholders and generate ElementTree tree.
178
179        Keyword arguments:
180
181        * data: string with placeholders instead of ElementTree elements.
182        * parent: Element, which contains processing inline data
183
184        Returns: list with ElementTree elements with applied inline patterns.
185
186        """
187        def linkText(text):
188            if text:
189                if result:
190                    if result[-1].tail:
191                        result[-1].tail += text
192                    else:
193                        result[-1].tail = text
194                else:
195                    if parent.text:
196                        parent.text += text
197                    else:
198                        parent.text = text
199        result = []
200        strartIndex = 0
201        while data:
202            index = data.find(self.__placeholder_prefix, strartIndex)
203            if index != -1:
204                id, phEndIndex = self.__findPlaceholder(data, index)
205
206                if id in self.stashed_nodes:
207                    node = self.stashed_nodes.get(id)
208
209                    if index > 0:
210                        text = data[strartIndex:index]
211                        linkText(text)
212
213                    if not isString(node): # it's Element
214                        for child in [node] + node.getchildren():
215                            if child.tail:
216                                if child.tail.strip():
217                                    self.__processElementText(node, child,False)
218                            if child.text:
219                                if child.text.strip():
220                                    self.__processElementText(child, child)
221                    else: # it's just a string
222                        linkText(node)
223                        strartIndex = phEndIndex
224                        continue
225
226                    strartIndex = phEndIndex
227                    result.append(node)
228
229                else: # wrong placeholder
230                    end = index + len(self.__placeholder_prefix)
231                    linkText(data[strartIndex:end])
232                    strartIndex = end
233            else:
234                text = data[strartIndex:]
235                if isinstance(data, util.AtomicString):
236                    # We don't want to loose the AtomicString
237                    text = util.AtomicString(text)
238                linkText(text)
239                data = ""
240
241        return result
242
243    def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
244        """
245        Check if the line fits the pattern, create the necessary
246        elements, add it to stashed_nodes.
247
248        Keyword arguments:
249
250        * data: the text to be processed
251        * pattern: the pattern to be checked
252        * patternIndex: index of current pattern
253        * startIndex: string index, from which we start searching
254
255        Returns: String with placeholders instead of ElementTree elements.
256
257        """
258        match = pattern.getCompiledRegExp().match(data[startIndex:])
259        leftData = data[:startIndex]
260
261        if not match:
262            return data, False, 0
263
264        node = pattern.handleMatch(match)
265
266        if node is None:
267            return data, True, len(leftData)+match.span(len(match.groups()))[0]
268
269        if not isString(node):
270            if not isinstance(node.text, util.AtomicString):
271                # We need to process current node too
272                for child in [node] + node.getchildren():
273                    if not isString(node):
274                        if child.text:
275                            child.text = self.__handleInline(child.text,
276                                                            patternIndex + 1)
277                        if child.tail:
278                            child.tail = self.__handleInline(child.tail,
279                                                            patternIndex)
280
281        placeholder = self.__stashNode(node, pattern.type())
282
283        return "%s%s%s%s" % (leftData,
284                             match.group(1),
285                             placeholder, match.groups()[-1]), True, 0
286
287    def run(self, tree):
288        """Apply inline patterns to a parsed Markdown tree.
289
290        Iterate over ElementTree, find elements with inline tag, apply inline
291        patterns and append newly created Elements to tree.  If you don't
292        want to process your data with inline paterns, instead of normal string,
293        use subclass AtomicString:
294
295            node.text = markdown.AtomicString("This will not be processed.")
296
297        Arguments:
298
299        * tree: ElementTree object, representing Markdown tree.
300
301        Returns: ElementTree object with applied inline patterns.
302
303        """
304        self.stashed_nodes = {}
305
306        stack = [tree]
307
308        while stack:
309            currElement = stack.pop()
310            insertQueue = []
311            for child in currElement.getchildren():
312                if child.text and not isinstance(child.text, util.AtomicString):
313                    text = child.text
314                    child.text = None
315                    lst = self.__processPlaceholders(self.__handleInline(
316                                                    text), child)
317                    stack += lst
318                    insertQueue.append((child, lst))
319                if child.tail:
320                    tail = self.__handleInline(child.tail)
321                    dumby = util.etree.Element('d')
322                    tailResult = self.__processPlaceholders(tail, dumby)
323                    if dumby.text:
324                        child.tail = dumby.text
325                    else:
326                        child.tail = None
327                    pos = currElement.getchildren().index(child) + 1
328                    tailResult.reverse()
329                    for newChild in tailResult:
330                        currElement.insert(pos, newChild)
331                if child.getchildren():
332                    stack.append(child)
333
334            for element, lst in insertQueue:
335                if self.markdown.enable_attributes:
336                    if element.text and isString(element.text):
337                        element.text = \
338                            inlinepatterns.handleAttributes(element.text,
339                                                                    element)
340                i = 0
341                for newChild in lst:
342                    if self.markdown.enable_attributes:
343                        # Processing attributes
344                        if newChild.tail and isString(newChild.tail):
345                            newChild.tail = \
346                                inlinepatterns.handleAttributes(newChild.tail,
347                                                                    element)
348                        if newChild.text and isString(newChild.text):
349                            newChild.text = \
350                                inlinepatterns.handleAttributes(newChild.text,
351                                                                    newChild)
352                    element.insert(i, newChild)
353                    i += 1
354        return tree
355
356
357class PrettifyTreeprocessor(Treeprocessor):
358    """ Add linebreaks to the html document. """
359
360    def _prettifyETree(self, elem):
361        """ Recursively add linebreaks to ElementTree children. """
362
363        i = "\n"
364        if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
365            if (not elem.text or not elem.text.strip()) \
366                    and len(elem) and util.isBlockLevel(elem[0].tag):
367                elem.text = i
368            for e in elem:
369                if util.isBlockLevel(e.tag):
370                    self._prettifyETree(e)
371            if not elem.tail or not elem.tail.strip():
372                elem.tail = i
373        if not elem.tail or not elem.tail.strip():
374            elem.tail = i
375
376    def run(self, root):
377        """ Add linebreaks to ElementTree root object. """
378
379        self._prettifyETree(root)
380        # Do <br />'s seperately as they are often in the middle of
381        # inline content and missed by _prettifyETree.
382        brs = root.getiterator('br')
383        for br in brs:
384            if not br.tail or not br.tail.strip():
385                br.tail = '\n'
386            else:
387                br.tail = '\n%s' % br.tail
388        # Clean up extra empty lines at end of code blocks.
389        pres = root.getiterator('pre')
390        for pre in pres:
391            if len(pre) and pre[0].tag == 'code':
392                pre[0].text = pre[0].text.rstrip() + '\n'
393