1# -*- coding: utf-8 -*-
2# markdown is released under the BSD license
3# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
4# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
5# Copyright 2004 Manfred Stienstra (the original version)
6#
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are met:
11#
12# *   Redistributions of source code must retain the above copyright
13#     notice, this list of conditions and the following disclaimer.
14# *   Redistributions in binary form must reproduce the above copyright
15#     notice, this list of conditions and the following disclaimer in the
16#     documentation and/or other materials provided with the distribution.
17# *   Neither the name of the <organization> nor the
18#     names of its contributors may be used to endorse or promote products
19#     derived from this software without specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
22# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
25# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31# POSSIBILITY OF SUCH DAMAGE.
32
33
34from __future__ import unicode_literals
35import re
36import sys
37
38
39"""
40Python 3 Stuff
41=============================================================================
42"""
43PY3 = sys.version_info[0] == 3
44
45if PY3:
46    string_type = str
47    text_type = str
48    int2str = chr
49else:
50    string_type = basestring
51    text_type = unicode
52    int2str = unichr
53
54
55"""
56Constants you might want to modify
57-----------------------------------------------------------------------------
58"""
59
60BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
61                                  "|script|noscript|form|fieldset|iframe|math"
62                                  "|hr|hr/|style|li|dt|dd|thead|tbody"
63                                  "|tr|th|td|section|footer|header|group|figure"
64                                  "|figcaption|aside|article|canvas|output"
65                                  "|progress|video)$", re.IGNORECASE)
66# Placeholders
67STX = '\u0002'  # Use STX ("Start of text") for start-of-placeholder
68ETX = '\u0003'  # Use ETX ("End of text") for end-of-placeholder
69INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
70INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
71INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
72AMP_SUBSTITUTE = STX+"amp"+ETX
73
74"""
75Constants you probably do not need to change
76-----------------------------------------------------------------------------
77"""
78
79RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'),
80                     # Hebrew (0590-05FF), Arabic (0600-06FF),
81                     # Syriac (0700-074F), Arabic supplement (0750-077F),
82                     # Thaana (0780-07BF), Nko (07C0-07FF).
83                    ('\u2D30', '\u2D7F'), # Tifinagh
84                    )
85
86# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
87# markdown.util import etree`).  Do not import it by yourself.
88
89try: # Is the C implemenation of ElementTree available?
90    import xml.etree.cElementTree as etree
91    from xml.etree.ElementTree import Comment
92    # Serializers (including ours) test with non-c Comment
93    etree.test_comment = Comment
94    if etree.VERSION < "1.0.5":
95        raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
96except (ImportError, RuntimeError):
97    # Use the Python implementation of ElementTree?
98    import xml.etree.ElementTree as etree
99    if etree.VERSION < "1.1":
100        raise RuntimeError("ElementTree version 1.1 or higher is required")
101
102
103"""
104AUXILIARY GLOBAL FUNCTIONS
105=============================================================================
106"""
107
108
109def isBlockLevel(tag):
110    """Check if the tag is a block level HTML tag."""
111    if isinstance(tag, string_type):
112        return BLOCK_LEVEL_ELEMENTS.match(tag)
113    # Some ElementTree tags are not strings, so return False.
114    return False
115
116"""
117MISC AUXILIARY CLASSES
118=============================================================================
119"""
120
121class AtomicString(text_type):
122    """A string which should not be further processed."""
123    pass
124
125
126class Processor(object):
127    def __init__(self, markdown_instance=None):
128        if markdown_instance:
129            self.markdown = markdown_instance
130
131
132class HtmlStash(object):
133    """
134    This class is used for stashing HTML objects that we extract
135    in the beginning and replace with place-holders.
136    """
137
138    def __init__ (self):
139        """ Create a HtmlStash. """
140        self.html_counter = 0 # for counting inline html segments
141        self.rawHtmlBlocks=[]
142
143    def store(self, html, safe=False):
144        """
145        Saves an HTML segment for later reinsertion.  Returns a
146        placeholder string that needs to be inserted into the
147        document.
148
149        Keyword arguments:
150
151        * html: an html segment
152        * safe: label an html segment as safe for safemode
153
154        Returns : a placeholder string
155
156        """
157        self.rawHtmlBlocks.append((html, safe))
158        placeholder = self.get_placeholder(self.html_counter)
159        self.html_counter += 1
160        return placeholder
161
162    def reset(self):
163        self.html_counter = 0
164        self.rawHtmlBlocks = []
165
166    def get_placeholder(self, key):
167        return "%swzxhzdk:%d%s" % (STX, key, ETX)
168
169