1f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek"""
2f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted KremenekPython Markdown
3f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek===============
4f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
5f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted KremenekPython Markdown converts Markdown to HTML and can be used as a library or
6f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenekcalled from the command line.
7f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
8f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek## Basic usage as a module:
9f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
10f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    import markdown
11f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    md = Markdown()
12f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    html = md.convert(your_text_string)
13f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
14d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks## Basic use from the command line:
15d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
16e6348c336fecc8da9288ea367375a1b1cd2358d2Argyrios Kyrtzidis    markdown source.txt > destination.html
17f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
18f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted KremenekRun "markdown --help" to see more options.
19802be99a6817aba6edb166b93c133da4358aa783Zhongxing Xu
20f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek## Extensions
21efceabd2380f49306bc0229583458e93b062094bDaniel Dunbar
22aa5609891df937291bf962dd2fc7deb2ceae292fAnna ZaksSee <http://www.freewisdom.org/projects/python-markdown/> for more
23efceabd2380f49306bc0229583458e93b062094bDaniel Dunbarinformation and instructions on how to extend the functionality of
24d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna ZaksPython Markdown.  Read that before you try modifying this file.
25371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek
2627af04bcca46f8a3374586be1301477f9123f5e1Argyrios Kyrtzidis## Authors and License
2743dee220252ef0b42c5f8a3bb1eca97f84f2565fArgyrios Kyrtzidis
2821142581d55918beed544a757e4af3bb865b1812Ted KremenekStarted by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
299b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenekmaintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
309b663716449b618ba0390b1dbebc54fa8e971124Ted KremenekLimberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
319b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek
329b663716449b618ba0390b1dbebc54fa8e971124Ted KremenekContact: markdown@freewisdom.org
33f39d962cf84f46d2c0512157259ae1d41a1a5173David Blaikie
34a7af5ea88a6c5bdf87497cca6c20831e8c546751Argyrios KyrtzidisCopyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
35efceabd2380f49306bc0229583458e93b062094bDaniel DunbarCopyright 200? Django Software Foundation (OrderedDict implementation)
36efceabd2380f49306bc0229583458e93b062094bDaniel DunbarCopyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
379b414d3e2d0cb84512b55a3275a98490b090162aDaniel DunbarCopyright 2004 Manfred Stienstra (the original version)
38efceabd2380f49306bc0229583458e93b062094bDaniel Dunbar
39f8ce6991f41d55b6e8526b7a7919771428e2b181Ted KremenekLicense: BSD (see docs/LICENSE for details).
4003013fa9a0bf1ef4b907f5fec006c8f4000fdd21Michael J. Spencer"""
4103013fa9a0bf1ef4b907f5fec006c8f4000fdd21Michael J. Spencer
42d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaksversion = "2.0.3"
43d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaksversion_info = (2,0,3, "Final")
446cb7c1a43b0c8f739d1f54b7fdae5ede86033496Benjamin Kramer
456a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaksimport re
4681fb169f42769e02c7425b23885a261c025fd5e6Anna Zaksimport codecs
47db09a4dee28a4515438af60f2d2b4a83e4965c31Ted Kremenekimport sys
487fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaksimport warnings
497fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaksimport logging
50f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenekfrom logging import DEBUG, INFO, WARN, ERROR, CRITICAL
519ef6537a894c33003359b1f9b9676e9178e028b7Ted Kremenek
52d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
53f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek"""
54c5619d901a68dc27a9e310a6a831f03efebcd950Zhongxing XuCONSTANTS
55ff944a8c481d6c0f1ad2633e4be9bf8b1dd2a09fZhongxing Xu=============================================================================
563fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks"""
57d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
58e62f048960645b79363408fdead53fec2a063c52Anna Zaks"""
59e62f048960645b79363408fdead53fec2a063c52Anna ZaksConstants you might want to modify
60e62f048960645b79363408fdead53fec2a063c52Anna Zaks-----------------------------------------------------------------------------
61d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks"""
62be1fe1eb12a1cb91c8e3a9fcc2db4dfe989def6cTed Kremenek
63f39d962cf84f46d2c0512157259ae1d41a1a5173David Blaikie# default logging level for command-line use
64f75560670bcdd59b051149bdece3eac14e313853Ted KremenekCOMMAND_LINE_LOGGING_LEVEL = CRITICAL
65f75560670bcdd59b051149bdece3eac14e313853Ted KremenekTAB_LENGTH = 4               # expand tabs to this many spaces
66ef3643fbbbf66247c5e205497fae0f46e240c143David BlaikieENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
67ef3643fbbbf66247c5e205497fae0f46e240c143David BlaikieSMART_EMPHASIS = True        # this_or_that does not become this<i>or</i>that
68efceabd2380f49306bc0229583458e93b062094bDaniel DunbarDEFAULT_OUTPUT_FORMAT = 'xhtml1'     # xhtml or html4 output
69ef3643fbbbf66247c5e205497fae0f46e240c143David BlaikieHTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
70ef3643fbbbf66247c5e205497fae0f46e240c143David BlaikieBLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
71ef3643fbbbf66247c5e205497fae0f46e240c143David Blaikie                                  "|script|noscript|form|fieldset|iframe|math"
72f75560670bcdd59b051149bdece3eac14e313853Ted Kremenek                                  "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
73f75560670bcdd59b051149bdece3eac14e313853Ted Kremenek                                  "|tr|th|td")
74f75560670bcdd59b051149bdece3eac14e313853Ted KremenekDOC_TAG = "div"     # Element used to wrap document - later removed
75f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
76f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek# Placeholders
77f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted KremenekSTX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
78f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted KremenekETX = u'\u0003'  # Use ETX ("End of text") for end-of-placeholder
79f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted KremenekINLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
80aa5609891df937291bf962dd2fc7deb2ceae292fAnna ZaksINLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
81aa5609891df937291bf962dd2fc7deb2ceae292fAnna ZaksAMP_SUBSTITUTE = STX+"amp"+ETX
82aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
83aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
84aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks"""
85aa5609891df937291bf962dd2fc7deb2ceae292fAnna ZaksConstants you probably do not need to change
86aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks-----------------------------------------------------------------------------
87aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks"""
88aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
89aa5609891df937291bf962dd2fc7deb2ceae292fAnna ZaksRTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
90aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                     # Hebrew (0590-05FF), Arabic (0600-06FF),
91aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                     # Syriac (0700-074F), Arabic supplement (0750-077F),
92aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                     # Thaana (0780-07BF), Nko (07C0-07FF).
93ed8afacb8118b71bcfa8017059e51da325e7691bZhongxing Xu                    (u'\u2D30', u'\u2D7F'), # Tifinagh
949c378f705405d37f49795d5e915989de774fe11fTed Kremenek                    )
951d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
961d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
971d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek"""
9808b86531ade68727c56918f162816075b87c864aJordy RoseAUXILIARY GLOBAL FUNCTIONS
99d07a0d0279c09d1017f8450fce575a94dc9703c0Zhongxing Xu=============================================================================
1006a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks"""
1016a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
1026a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
1036a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaksdef message(level, text):
1046a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks    """ A wrapper method for logging debug messages. """
1056a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks    logger =  logging.getLogger('MARKDOWN')
106cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek    if logger.handlers:
1071d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek        # The logger is configured
108ef3643fbbbf66247c5e205497fae0f46e240c143David Blaikie        logger.log(level, text)
109d07a0d0279c09d1017f8450fce575a94dc9703c0Zhongxing Xu        if level > WARN:
1101d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek            sys.exit(0)
1111d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek    elif level > WARN:
112f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek        raise MarkdownException, text
1136f42b62b6194f53bcbc349f5d17388e1936535d7Dylan Noblesmith    else:
1146f42b62b6194f53bcbc349f5d17388e1936535d7Dylan Noblesmith        warnings.warn(text, MarkdownWarning)
115c471e7b44e63ff1b46b480e723c4130aeaef5a8aZhongxing Xu
116d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
117d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaksdef isBlockLevel(tag):
118d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks    """Check if the tag is a block level HTML tag."""
1193bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks    return BLOCK_LEVEL_ELEMENTS.match(tag)
1203bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks
1213bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks"""
1223bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna ZaksMISC AUXILIARY CLASSES
1231d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek=============================================================================
1241d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek"""
12508b86531ade68727c56918f162816075b87c864aJordy Rose
12608b86531ade68727c56918f162816075b87c864aJordy Roseclass AtomicString(unicode):
127aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks    """A string which should not be further processed."""
128aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks    pass
1291d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
130d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
131d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaksclass MarkdownException(Exception):
132d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks    """ A Markdown Exception. """
133d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks    pass
134d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
135d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
136d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaksclass MarkdownWarning(Warning):
137d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks    """ A Markdown Warning. """
138d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks    pass
1391d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
140fda7832b000ff8927386f093b52c067641679469Zhongxing Xu
1411d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek"""
142ef3643fbbbf66247c5e205497fae0f46e240c143David BlaikieOVERALL DESIGN
1431d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek=============================================================================
1441d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
1451d9cbeb76cf4c36acf5545028e2b2ac207086442Ted KremenekMarkdown processing takes place in four steps:
146fda7832b000ff8927386f093b52c067641679469Zhongxing Xu
1471d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek1. A bunch of "preprocessors" munge the input text.
148fda7832b000ff8927386f093b52c067641679469Zhongxing Xu2. BlockParser() parses the high-level structural elements of the
149fda7832b000ff8927386f093b52c067641679469Zhongxing Xu   pre-processed text into an ElementTree.
150a599ae8826b01c0160a519b0fc5a4871f599bf04Argyrios Kyrtzidis3. A bunch of "treeprocessors" are run against the ElementTree. One such
151a599ae8826b01c0160a519b0fc5a4871f599bf04Argyrios Kyrtzidis   treeprocessor runs InlinePatterns against the ElementTree, detecting inline
152a599ae8826b01c0160a519b0fc5a4871f599bf04Argyrios Kyrtzidis   markup.
153ef3643fbbbf66247c5e205497fae0f46e240c143David Blaikie4. Some post-processors are run against the text after the ElementTree has
1541d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek   been serialized into text.
155fda7832b000ff8927386f093b52c067641679469Zhongxing Xu5. The output is written to a string.
1561d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
1575f83d6f36a7308eef21d87104fd70c421e854448Argyrios KyrtzidisThose steps are put together by the Markdown() class.
1585f83d6f36a7308eef21d87104fd70c421e854448Argyrios Kyrtzidis
159b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikie"""
160fda7832b000ff8927386f093b52c067641679469Zhongxing Xu
1615f83d6f36a7308eef21d87104fd70c421e854448Argyrios Kyrtzidisimport preprocessors
162fda7832b000ff8927386f093b52c067641679469Zhongxing Xuimport blockprocessors
1631d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenekimport treeprocessors
1641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpimport inlinepatterns
1655f83d6f36a7308eef21d87104fd70c421e854448Argyrios Kyrtzidisimport postprocessors
1665f83d6f36a7308eef21d87104fd70c421e854448Argyrios Kyrtzidisimport blockparser
167b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikieimport etree_loader
168fda7832b000ff8927386f093b52c067641679469Zhongxing Xuimport odict
1695f83d6f36a7308eef21d87104fd70c421e854448Argyrios Kyrtzidis
170fda7832b000ff8927386f093b52c067641679469Zhongxing Xu# Extensions should use "markdown.etree" instead of "etree" (or do `from
171fda7832b000ff8927386f093b52c067641679469Zhongxing Xu# markdown import etree`).  Do not import it by yourself.
1721d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
173f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremeneketree = etree_loader.importETree()
174aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
175c4a1437c15da43eb8d2601cdce13161ef41a4389Ted Kremenek# Adds the ability to output html4
1761d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenekimport html4
177f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek
178fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek
179fc576514d06c46a7cac49500169411d82f38d04bTed Kremenekclass Markdown:
180cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor    """Convert Markdown to HTML."""
181aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
182aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks    def __init__(self,
183aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                 extensions=[],
184aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                 extension_configs={},
185aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                 safe_mode = False,
186aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                 output_format=DEFAULT_OUTPUT_FORMAT):
187aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        """
188cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor        Creates a new Markdown instance.
189cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor
190b8989f27f116ff2400e92a52c067a69846119eb5Benjamin Kramer        Keyword arguments:
191cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor
192cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor        * extensions: A list of extensions.
193cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor           If they are of type string, the module mdx_name.py will be loaded.
194cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor           If they are a subclass of markdown.Extension, they will be used
195cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor           as-is.
196cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor        * extension-configs: Configuration setting for extensions.
197cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
198cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor        * output_format: Format of output. Supported formats are:
199cb7b1e17b63967317ab5cc55682168cf0380519aDouglas Gregor            * "xhtml1": Outputs XHTML 1.x. Default.
20035fa76d0bb6fb8c86159a7506efd094a4fe376d2Ted Kremenek            * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
2011d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek            * "html4": Outputs HTML 4
2021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            * "html": Outputs latest supported version of HTML (currently HTML 4).
2031d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek            Note that it is suggested that the more specific formats ("xhtml1"
2041d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek            and "html4") be used as "xhtml" or "html" may change in the future
2054e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie            if it makes sense at that time.
20608b86531ade68727c56918f162816075b87c864aJordy Rose
2071d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek        """
2084e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie
2091d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek        self.safeMode = safe_mode
21043dee220252ef0b42c5f8a3bb1eca97f84f2565fArgyrios Kyrtzidis        self.registeredExtensions = []
2116362b893731ccf4480a96527db9e55e04b801503Zhongxing Xu        self.docType = ""
2121d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek        self.stripTopLevelTags = True
213d30952838421ddfb9f7e346b2ba8213889a5f789Anna Zaks
21466253352131e3e7a22b3bfd0e180607aa2bfb988Anna Zaks        # Preprocessors
2159121ba232903ebe61e7bbe14ca294cf0f07dfa96Marcin Swiderski        self.preprocessors = odict.OrderedDict()
216d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek        self.preprocessors["html_block"] = \
2178235f9c9c8b3d1737d1c6bd57f7ba3f616b92392Anna Zaks                preprocessors.HtmlBlockPreprocessor(self)
21866253352131e3e7a22b3bfd0e180607aa2bfb988Anna Zaks        self.preprocessors["reference"] = \
2198235f9c9c8b3d1737d1c6bd57f7ba3f616b92392Anna Zaks                preprocessors.ReferencePreprocessor(self)
22066253352131e3e7a22b3bfd0e180607aa2bfb988Anna Zaks        # footnote preprocessor will be inserted with "<reference"
2215903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks
222b47dbcbc12430fdf3e5a5b9f59cdec5480e89e75Anna Zaks        # Block processors - ran by the parser
2231d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek        self.parser = blockparser.BlockParser()
2241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        self.parser.blockprocessors['empty'] = \
2256a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                blockprocessors.EmptyBlockProcessor(self.parser)
2266a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.parser.blockprocessors['indent'] = \
2276a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                blockprocessors.ListIndentProcessor(self.parser)
2286a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.parser.blockprocessors['code'] = \
2296a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                blockprocessors.CodeBlockProcessor(self.parser)
2301d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek        self.parser.blockprocessors['hashheader'] = \
23114cc9451de4a9539bf79e4e5d63248c2377426dbTed Kremenek                blockprocessors.HashHeaderProcessor(self.parser)
2326a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.parser.blockprocessors['setextheader'] = \
2336a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                blockprocessors.SetextHeaderProcessor(self.parser)
234694a942b0a14e02757a695142c45437eec6e3684Anna Zaks        self.parser.blockprocessors['hr'] = \
235aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                blockprocessors.HRProcessor(self.parser)
236aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.parser.blockprocessors['olist'] = \
237aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                blockprocessors.OListProcessor(self.parser)
238aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.parser.blockprocessors['ulist'] = \
239aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                blockprocessors.UListProcessor(self.parser)
240aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.parser.blockprocessors['quote'] = \
241aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                blockprocessors.BlockQuoteProcessor(self.parser)
2426a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.parser.blockprocessors['paragraph'] = \
2436a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                blockprocessors.ParagraphProcessor(self.parser)
244aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
2456a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
2466a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        #self.prePatterns = []
2476a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
248aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        # Inline patterns - Run on the tree
249aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns = odict.OrderedDict()
250c8848f34bbde083b8d89f445eb605eaabf30d6a8Anna Zaks        self.inlinePatterns["backtick"] = \
251aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
252aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["escape"] = \
253aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
254aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["reference"] = \
255aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
256aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["link"] = \
257aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
258aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["image_link"] = \
259aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
260aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["image_reference"] = \
261aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
262aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["autolink"] = \
263aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
264aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["automail"] = \
265aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
266aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["linebreak2"] = \
267aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
268aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["linebreak"] = \
269aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
270aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["html"] = \
271aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
272aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["entity"] = \
273aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
274aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["not_strong"] = \
275aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
276aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.inlinePatterns["strong_em"] = \
277aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
2786a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.inlinePatterns["strong"] = \
2796a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
2806a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.inlinePatterns["emphasis"] = \
2816a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
2826a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.inlinePatterns["emphasis2"] = \
2836a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
2846a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        # The order of the handlers matters!!!
2851d9cbeb76cf4c36acf5545028e2b2ac207086442Ted Kremenek
286f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
287f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek        # Tree processors - run once we have a basic parse.
288aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        self.treeprocessors = odict.OrderedDict()
289f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
290f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek        self.treeprocessors["prettify"] = \
291f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek                treeprocessors.PrettifyTreeprocessor(self)
292d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
293f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek        # Postprocessors - finishing touches.
2946a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.postprocessors = odict.OrderedDict()
2956a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.postprocessors["raw_html"] = \
2966a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                postprocessors.RawHtmlPostprocessor(self)
2976a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.postprocessors["amp_substitute"] = \
2986a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                postprocessors.AndSubstitutePostprocessor()
2996a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        # footnote postprocessor will be inserted with ">amp_substitute"
3006a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
3016a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        # Map format keys to serializers
3026a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.output_formats = {
3036a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            'html'  : html4.to_html_string,
3046a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            'html4' : html4.to_html_string,
3056a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            'xhtml' : etree.tostring,
3066a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks            'xhtml1': etree.tostring,
3076a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        }
3086a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
3096a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.references = {}
3106a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.htmlStash = preprocessors.HtmlStash()
311577f14a34457032523e59dbbbacb88ca2cd4db57Ted Kremenek        self.registerExtensions(extensions = extensions,
3126a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                                configs = extension_configs)
3136a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.set_output_format(output_format)
3146a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        self.reset()
315694a942b0a14e02757a695142c45437eec6e3684Anna Zaks
316d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks    def registerExtensions(self, extensions, configs):
317d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks        """
318d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks        Register extensions with this instance of Markdown.
319eaa069075f060f58840af03e0bd5bd75bb27e809Anna Zaks
3206a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        Keyword aurguments:
321694a942b0a14e02757a695142c45437eec6e3684Anna Zaks
322694a942b0a14e02757a695142c45437eec6e3684Anna Zaks        * extensions: A list of extensions, which can either
323694a942b0a14e02757a695142c45437eec6e3684Anna Zaks           be strings or objects.  See the docstring on Markdown.
324694a942b0a14e02757a695142c45437eec6e3684Anna Zaks        * configs: A dictionary mapping module names to config options.
325eaa069075f060f58840af03e0bd5bd75bb27e809Anna Zaks
326eaa069075f060f58840af03e0bd5bd75bb27e809Anna Zaks        """
327d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks        for ext in extensions:
328d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks            if isinstance(ext, basestring):
329d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks                ext = load_extension(ext, configs.get(ext, []))
3303fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks            if isinstance(ext, Extension):
331b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks                try:
332b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks                    ext.extendMarkdown(self, globals())
333b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks                except NotImplementedError, e:
334b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks                    message(ERROR, e)
335b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks            else:
336d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks                message(ERROR, 'Extension "%s.%s" must be of type: "markdown.Extension".' \
337d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks                    % (ext.__class__.__module__, ext.__class__.__name__))
3383fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks
339d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks    def registerExtension(self, extension):
3403fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks        """ This gets called by the extension """
3413fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks        self.registeredExtensions.append(extension)
342d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
343b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks    def reset(self):
344b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks        """
345b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks        Resets all state variables so that we can start with a new text.
346b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks        """
347cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek        self.htmlStash.reset()
348b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks        self.references.clear()
349b990d039c7e01ad0055dcbd1e13a691813397b96Anna Zaks
3507fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        for extension in self.registeredExtensions:
351371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek            extension.reset()
352d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
3537fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks    def set_output_format(self, format):
3547fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        """ Set the output format for the class instance. """
355d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks        try:
356d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks            self.serializer = self.output_formats[format.lower()]
3577fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        except KeyError:
3587fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks            message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \
359cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek                               % (format, self.output_formats.keys()))
3607fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks
361371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek    def convert(self, source):
362371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek        """
363371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek        Convert markdown to serialized XHTML or HTML.
364371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek
365371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek        Keyword arguments:
366371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek
367371b477836f289f2e9caaab58530f187b51bc86dTed Kremenek        * source: Source text as a Unicode string.
3687fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks
3697fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        """
3707fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks
3717fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        # Fixup the source text
3727fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        if not source.strip():
3737fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks            return u""  # a blank unicode string
3746a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        try:
3757fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks            source = unicode(source)
3767fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        except UnicodeDecodeError:
3777fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
3787fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks            return u""
3797fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks
3807fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        source = source.replace(STX, "").replace(ETX, "")
381cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
382cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek        source = re.sub(r'\n\s+\n', '\n\n', source)
3837fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        source = source.expandtabs(TAB_LENGTH)
3847fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks
3857fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        # Split into lines and run the line preprocessors.
3867fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        self.lines = source.split("\n")
3877fe8dcef71ae56e43fd7df345db2895f84f2d0caAnna Zaks        for prep in self.preprocessors.values():
388d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks            self.lines = prep.run(self.lines)
389fee618af5dd7dee2caaa7347b372eb3dc5fdeffcTed Kremenek
390fee618af5dd7dee2caaa7347b372eb3dc5fdeffcTed Kremenek        # Parse the high-level elements.
39114cc9451de4a9539bf79e4e5d63248c2377426dbTed Kremenek        root = self.parser.parseDocument(self.lines).getroot()
392aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
393aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        # Run the tree-processors
394aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        for treeprocessor in self.treeprocessors.values():
395aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            newRoot = treeprocessor.run(root)
396aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            if newRoot:
397c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks                root = newRoot
398d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
399d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks        # Serialize _properly_.  Strip top-level tags.
400c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
401c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks        if self.stripTopLevelTags:
402c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks            try:
403c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks                start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
404aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                end = output.rindex('</%s>'%DOC_TAG)
405aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                output = output[start:end].strip()
406aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks            except ValueError:
407aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                if output.strip().endswith('<%s />'%DOC_TAG):
408aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                    # We have an empty document
409aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks                    output = ''
4106a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                else:
4116a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks                    # We have a serious problem
412cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek                    message(CRITICAL, 'Failed to strip top level tags.')
413577f14a34457032523e59dbbbacb88ca2cd4db57Ted Kremenek
414577f14a34457032523e59dbbbacb88ca2cd4db57Ted Kremenek        # Run the text post-processors
415577f14a34457032523e59dbbbacb88ca2cd4db57Ted Kremenek        for pp in self.postprocessors.values():
416577f14a34457032523e59dbbbacb88ca2cd4db57Ted Kremenek            output = pp.run(output)
417694a942b0a14e02757a695142c45437eec6e3684Anna Zaks
418694a942b0a14e02757a695142c45437eec6e3684Anna Zaks        return output.strip()
419577f14a34457032523e59dbbbacb88ca2cd4db57Ted Kremenek
420cb0a5039c243f5b0c178e70f424adac334e5789bTed Kremenek    def convertFile(self, input=None, output=None, encoding=None):
421aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        """Converts a markdown file and returns the HTML as a unicode string.
422aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
423694a942b0a14e02757a695142c45437eec6e3684Anna Zaks        Decodes the file using the provided encoding (defaults to utf-8),
424b317f8f5ca8737a5bbad97a3f7566a2dbd2ed61bZhongxing Xu        passes the file content to markdown, and outputs the html to either
425c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks        the provided stream or the file with provided name, using the same
426c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks        encoding as the source file.
427aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
428aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        **Note:** This is the only place that decoding and encoding of unicode
429c5bdc556f6a65c677e0ed73f918c3000ecad33afAnna Zaks        takes place in Python-Markdown.  (All other code is unicode-in /
4309be6e7ce5788e50c62d40c59b0bbc2ea423683f7Ted Kremenek        unicode-out.)
431ef3643fbbbf66247c5e205497fae0f46e240c143David Blaikie
432690a7f431d6863a101711e67636d51ddd13f35c5Ted Kremenek        Keyword arguments:
433ef3643fbbbf66247c5e205497fae0f46e240c143David Blaikie
434da17fd50ad485fd2a1fc5c2f055caacf532992daZhongxing Xu        * input: Name of source text file.
435d07a0d0279c09d1017f8450fce575a94dc9703c0Zhongxing Xu        * output: Name of output file. Writes to stdout if `None`.
436d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks        * encoding: Encoding of input and output files. Defaults to utf-8.
437d38f79543136ba68cd14b1dab0856474df1fbfd5Anna Zaks
4386b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks        """
4396b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks
4406b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks        encoding = encoding or "utf-8"
4416b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks
4426b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks        # Read the source
4436b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks        input_file = codecs.open(input, mode="r", encoding=encoding)
4446b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks        text = input_file.read()
4456b77ce8824cf62c2cfb61cf2d801eb3fcfbecffeAnna Zaks        input_file.close()
446db09a4dee28a4515438af60f2d2b4a83e4965c31Ted Kremenek        text = text.lstrip(u'\ufeff') # remove the byte-order mark
447db09a4dee28a4515438af60f2d2b4a83e4965c31Ted Kremenek
4485f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner        # Convert
449fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek        html = self.convert(text)
450fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek
451f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek        # Write to file or stdout
452fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek        if isinstance(output, (str, unicode)):
453fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek            output_file = codecs.open(output, "w", encoding=encoding)
454fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek            output_file.write(html)
455fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek            output_file.close()
456fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek        else:
457fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek            output.write(html.encode(encoding))
458d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
459d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
460d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks"""
461d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna ZaksExtensions
462d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks-----------------------------------------------------------------------------
463d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks"""
464d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
465d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaksclass Extension:
466d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks    """ Base class for extensions to subclass. """
467d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks    def __init__(self, configs = {}):
468d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks        """Create an instance of an Extention.
469d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
47098520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks        Keyword arguments:
471d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks
472d95e0b830ed031f2ea0e15e3679cd51b9bf23a9cAnna Zaks        * configs: A dict of configuration setting used by an Extension.
47398520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks        """
4741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        self.config = configs
475f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
476f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    def getConfig(self, key):
477fcd783d583d270b7ec1ec3e0fcf83cd93d30e381Ted Kremenek        """ Return a setting for the given key or an empty string. """
478402785357ab053dd53f4fdd858b9630a5e0f8badChandler Carruth        if key in self.config:
479fcd783d583d270b7ec1ec3e0fcf83cd93d30e381Ted Kremenek            return self.config[key][0]
48098520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks        else:
48198520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks            return ""
48298520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks
48398520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks    def getConfigInfo(self):
48498520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks        """ Return all config settings as a list of tuples. """
485aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks        return [(key, self.config[key][1]) for key in self.config.keys()]
4866a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks
48798520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks    def setConfig(self, key, value):
4881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        """ Set a config setting for `key` with the given `value`. """
489f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek        self.config[key][0] = value
490aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
49198520835eb1aa091429afa06e9f4f7ebe3864d34Anna Zaks    def extendMarkdown(self, md, md_globals):
4921d26f48dc2eea1c07431ca1519d7034a21b9bcffTed Kremenek        """
49358f5ec7d56b1ebf5f90ee11226ebe7663f2821eaTed Kremenek        Add the various proccesors and patterns to the Markdown Instance.
494f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek
4951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        This method must be overriden by every extension.
4965f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner
497fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek        Keyword arguments:
498f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek
49906a54a38be5054c910ffc92db60edab23f9ea105Argyrios Kyrtzidis        * md: The Markdown instance.
500fc576514d06c46a7cac49500169411d82f38d04bTed Kremenek
501f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek        * md_globals: Global variables in the markdown module namespace.
5029fb9474c5b267400d4abfbff63c8b39f378235d4Argyrios Kyrtzidis
5035f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner        """
5049fb9474c5b267400d4abfbff63c8b39f378235d4Argyrios Kyrtzidis        raise NotImplementedError, 'Extension "%s.%s" must define an "extendMarkdown"' \
505d655ab28fdf7c940d3f79f8f287954d7f76e0977Argyrios Kyrtzidis            'method.' % (self.__class__.__module__, self.__class__.__name__)
506aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
507aa5609891df937291bf962dd2fc7deb2ceae292fAnna Zaks
5081f5171edc9fb9b0698a75fa981c29a750b0a6082Anna Zaksdef load_extension(ext_name, configs = []):
5093fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks    """Load extension by name, then return the module.
5101f5171edc9fb9b0698a75fa981c29a750b0a6082Anna Zaks
5111f5171edc9fb9b0698a75fa981c29a750b0a6082Anna Zaks    The extension name may contain arguments as part of the string in the
512d655ab28fdf7c940d3f79f8f287954d7f76e0977Argyrios Kyrtzidis    following format: "extname(key1=value1,key2=value2)"
513f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
514f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    """
515f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
516d655ab28fdf7c940d3f79f8f287954d7f76e0977Argyrios Kyrtzidis    # Parse extensions config params (ignore the order)
517f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    configs = dict(configs)
518f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    pos = ext_name.find("(") # find the first "("
5193fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks    if pos > 0:
5206a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks        ext_args = ext_name[pos+1:-1]
521a5937bbfd19e61d651a58b0f0ffeef68457902a5Ted Kremenek        ext_name = ext_name[:pos]
52275d03cffe20b5c945ef04eba208efb0437339997Ted Kremenek        pairs = [x.split("=") for x in ext_args.split(",")]
5233fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks        configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
524f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek
5253fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks    # Setup the module names
5263bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks    ext_module = 'markdown.extensions'
527f6eafcca7734274d277afa121f2c4fb025a54218Ted Kremenek    module_name_new_style = '.'.join([ext_module, ext_name])
528f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    module_name_old_style = '_'.join(['mdx', ext_name])
5296f42b62b6194f53bcbc349f5d17388e1936535d7Dylan Noblesmith
5303fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks    # Try loading the extention first from one place, then another
531f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    try: # New style (markdown.extensons.<extension>)
532c5619d901a68dc27a9e310a6a831f03efebcd950Zhongxing Xu        module = __import__(module_name_new_style, {}, {}, [ext_module])
533f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    except ImportError:
5341eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        try: # Old style (mdx.<extension>)
535b35a74a40c1d2656a25e560e773ed48bdf49f9c0Ted Kremenek            module = __import__(module_name_old_style)
536d200187bd27f9ad68699693a6e57f9ee3ff260faJordy Rose        except ImportError:
5373fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks           message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
5381eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump               % (ext_name, module_name_new_style, module_name_old_style))
539f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek           # Return None so we don't try to initiate none-existant extension
540f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek           return None
541c5619d901a68dc27a9e310a6a831f03efebcd950Zhongxing Xu
5423df6421150271266b5a90fd4c6bfa6566c38c036Ted Kremenek    # If the module is loaded successfully, we expect it to define a
54334d7734b6ed1d9c0f647405e065251eb67f42badTed Kremenek    # function called makeExtension()
5443fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks    try:
5453fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks        return module.makeExtension(configs.items())
5461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    except AttributeError:
5473df6421150271266b5a90fd4c6bfa6566c38c036Ted Kremenek        message(CRITICAL, "Failed to initiate extension '%s'" % ext_name)
5483df6421150271266b5a90fd4c6bfa6566c38c036Ted Kremenek
549bc46f345838b1c0d420dbd3655c94f5f360fb5b8Ted Kremenek
550bc46f345838b1c0d420dbd3655c94f5f360fb5b8Ted Kremenekdef load_extensions(ext_names):
5516a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks    """Loads multiple extensions"""
5526a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks    extensions = []
55317a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose    for ext_name in ext_names:
5544e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie        extension = load_extension(ext_name)
55517a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose        if extension:
5563fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks            extensions.append(extension)
55717a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose    return extensions
55817a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose
55917a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose
5603fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks"""
56117a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy RoseEXPORTED FUNCTIONS
56217a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose=============================================================================
56317a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose
5643fd5f370a28552976c52e76c3035d79012d78ddaAnna ZaksThose are the two functions we really mean to export: markdown() and
5653fd5f370a28552976c52e76c3035d79012d78ddaAnna ZaksmarkdownFromFile().
56617a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose"""
56717a38e2636a8b1ce473fc6504c4b16cb09db29f4Jordy Rose
568b35a74a40c1d2656a25e560e773ed48bdf49f9c0Ted Kremenekdef markdown(text,
569b35a74a40c1d2656a25e560e773ed48bdf49f9c0Ted Kremenek             extensions = [],
570f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek             safe_mode = False,
571f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek             output_format = DEFAULT_OUTPUT_FORMAT):
572f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    """Convert a markdown string to HTML and return HTML as a unicode string.
573f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek
5749ef6537a894c33003359b1f9b9676e9178e028b7Ted Kremenek    This is a shortcut function for `Markdown` class to cover the most
57508b86531ade68727c56918f162816075b87c864aJordy Rose    basic use case.  It initializes an instance of Markdown, loads the
57608b86531ade68727c56918f162816075b87c864aJordy Rose    necessary extensions and runs the parser on the given text.
57708b86531ade68727c56918f162816075b87c864aJordy Rose
57808b86531ade68727c56918f162816075b87c864aJordy Rose    Keyword arguments:
579efceabd2380f49306bc0229583458e93b062094bDaniel Dunbar
580be1fe1eb12a1cb91c8e3a9fcc2db4dfe989def6cTed Kremenek    * text: Markdown formatted text as Unicode or ASCII string.
58108b86531ade68727c56918f162816075b87c864aJordy Rose    * extensions: A list of extensions or extension names (may contain config args).
582f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    * safe_mode: Disallow raw html.  One of "remove", "replace" or "escape".
583f4381fddf152a63e1ac97185293c47ec0ac2f1a6Ted Kremenek    * output_format: Format of output. Supported formats are:
584f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek        * "xhtml1": Outputs XHTML 1.x. Default.
585f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek        * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
586f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek        * "html4": Outputs HTML 4
587f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek        * "html": Outputs latest supported version of HTML (currently HTML 4).
588f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek        Note that it is suggested that the more specific formats ("xhtml1"
5891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        and "html4") be used as "xhtml" or "html" may change in the future
590c5619d901a68dc27a9e310a6a831f03efebcd950Zhongxing Xu        if it makes sense at that time.
5916f42b62b6194f53bcbc349f5d17388e1936535d7Dylan Noblesmith
592710ad9343f32b33b336369b20edad1a21a0b3299Ted Kremenek    Returns: An HTML document as a string.
593f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek
594f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    """
595f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    md = Markdown(extensions=load_extensions(extensions),
596f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek                  safe_mode=safe_mode,
5971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                  output_format=output_format)
598f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    return md.convert(text)
5999c378f705405d37f49795d5e915989de774fe11fTed Kremenek
60056b98719b0dbebb33cb228afa888c47156be2381Ted Kremenek
6011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpdef markdownFromFile(input = None,
602710ad9343f32b33b336369b20edad1a21a0b3299Ted Kremenek                     output = None,
6031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                     extensions = [],
6049c378f705405d37f49795d5e915989de774fe11fTed Kremenek                     encoding = None,
605f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek                     safe_mode = False,
6061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                     output_format = DEFAULT_OUTPUT_FORMAT):
607f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    """Read markdown code from a file and write it to a file or a stream."""
608f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek    md = Markdown(extensions=load_extensions(extensions),
609c5619d901a68dc27a9e310a6a831f03efebcd950Zhongxing Xu                  safe_mode=safe_mode,
610f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek                  output_format=output_format)
6111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    md.convertFile(input, output, encoding)
612710ad9343f32b33b336369b20edad1a21a0b3299Ted Kremenek
613f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek
614f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek
615f8ce6991f41d55b6e8526b7a7919771428e2b181Ted Kremenek