1#!/usr/bin/python
2
3"""
4CodeHilite Extension for Python-Markdown
5========================================
6
7Adds code/syntax highlighting to standard Python-Markdown code blocks.
8
9Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
10
11Project website: <http://www.freewisdom.org/project/python-markdown/CodeHilite>
12Contact: markdown@freewisdom.org
13
14License: BSD (see ../docs/LICENSE for details)
15
16Dependencies:
17* [Python 2.3+](http://python.org/)
18* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
19* [Pygments](http://pygments.org/)
20
21"""
22
23import markdown
24
25# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
26
27try:
28    TAB_LENGTH = markdown.TAB_LENGTH
29except AttributeError:
30    TAB_LENGTH = 4
31
32
33# ------------------ The Main CodeHilite Class ----------------------
34class CodeHilite:
35    """
36    Determine language of source code, and pass it into the pygments hilighter.
37
38    Basic Usage:
39        >>> code = CodeHilite(src = 'some text')
40        >>> html = code.hilite()
41
42    * src: Source string or any object with a .readline attribute.
43
44    * linenos: (Boolen) Turn line numbering 'on' or 'off' (off by default).
45
46    * css_class: Set class name of wrapper div ('codehilite' by default).
47
48    Low Level Usage:
49        >>> code = CodeHilite()
50        >>> code.src = 'some text' # String or anything with a .readline attr.
51        >>> code.linenos = True  # True or False; Turns line numbering on or of.
52        >>> html = code.hilite()
53
54    """
55
56    def __init__(self, src=None, linenos=False, css_class="codehilite"):
57        self.src = src
58        self.lang = None
59        self.linenos = linenos
60        self.css_class = css_class
61
62    def hilite(self):
63        """
64        Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
65        optional line numbers. The output should then be styled with css to
66        your liking. No styles are applied by default - only styling hooks
67        (i.e.: <span class="k">).
68
69        returns : A string of html.
70
71        """
72
73        self.src = self.src.strip('\n')
74
75        self._getLang()
76
77        try:
78            from pygments import highlight
79            from pygments.lexers import get_lexer_by_name, guess_lexer, \
80                                        TextLexer
81            from pygments.formatters import HtmlFormatter
82        except ImportError:
83            # just escape and pass through
84            txt = self._escape(self.src)
85            if self.linenos:
86                txt = self._number(txt)
87            else :
88                txt = '<div class="%s"><pre>%s</pre></div>\n'% \
89                        (self.css_class, txt)
90            return txt
91        else:
92            try:
93                lexer = get_lexer_by_name(self.lang)
94            except ValueError:
95                try:
96                    lexer = guess_lexer(self.src)
97                except ValueError:
98                    lexer = TextLexer()
99            formatter = HtmlFormatter(linenos=self.linenos,
100                                      cssclass=self.css_class)
101            return highlight(self.src, lexer, formatter)
102
103    def _escape(self, txt):
104        """ basic html escaping """
105        txt = txt.replace('&', '&amp;')
106        txt = txt.replace('<', '&lt;')
107        txt = txt.replace('>', '&gt;')
108        txt = txt.replace('"', '&quot;')
109        return txt
110
111    def _number(self, txt):
112        """ Use <ol> for line numbering """
113        # Fix Whitespace
114        txt = txt.replace('\t', ' '*TAB_LENGTH)
115        txt = txt.replace(" "*4, "&nbsp; &nbsp; ")
116        txt = txt.replace(" "*3, "&nbsp; &nbsp;")
117        txt = txt.replace(" "*2, "&nbsp; ")
118
119        # Add line numbers
120        lines = txt.splitlines()
121        txt = '<div class="codehilite"><pre><ol>\n'
122        for line in lines:
123            txt += '\t<li>%s</li>\n'% line
124        txt += '</ol></pre></div>\n'
125        return txt
126
127
128    def _getLang(self):
129        """
130        Determines language of a code block from shebang lines and whether said
131        line should be removed or left in place. If the sheband line contains a
132        path (even a single /) then it is assumed to be a real shebang lines and
133        left alone. However, if no path is given (e.i.: #!python or :::python)
134        then it is assumed to be a mock shebang for language identifitation of a
135        code fragment and removed from the code block prior to processing for
136        code highlighting. When a mock shebang (e.i: #!python) is found, line
137        numbering is turned on. When colons are found in place of a shebang
138        (e.i.: :::python), line numbering is left in the current state - off
139        by default.
140
141        """
142
143        import re
144
145        #split text into lines
146        lines = self.src.split("\n")
147        #pull first line to examine
148        fl = lines.pop(0)
149
150        c = re.compile(r'''
151            (?:(?:::+)|(?P<shebang>[#]!))	# Shebang or 2 or more colons.
152            (?P<path>(?:/\w+)*[/ ])?        # Zero or 1 path
153            (?P<lang>[\w+-]*)               # The language
154            ''',  re.VERBOSE)
155        # search first line for shebang
156        m = c.search(fl)
157        if m:
158            # we have a match
159            try:
160                self.lang = m.group('lang').lower()
161            except IndexError:
162                self.lang = None
163            if m.group('path'):
164                # path exists - restore first line
165                lines.insert(0, fl)
166            if m.group('shebang'):
167                # shebang exists - use line numbers
168                self.linenos = True
169        else:
170            # No match
171            lines.insert(0, fl)
172
173        self.src = "\n".join(lines).strip("\n")
174
175
176
177# ------------------ The Markdown Extension -------------------------------
178class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor):
179    """ Hilight source code in code blocks. """
180
181    def run(self, root):
182        """ Find code blocks and store in htmlStash. """
183        blocks = root.getiterator('pre')
184        for block in blocks:
185            children = block.getchildren()
186            if len(children) == 1 and children[0].tag == 'code':
187                code = CodeHilite(children[0].text,
188                            linenos=self.config['force_linenos'][0],
189                            css_class=self.config['css_class'][0])
190                placeholder = self.markdown.htmlStash.store(code.hilite(),
191                                                            safe=True)
192                # Clear codeblock in etree instance
193                block.clear()
194                # Change to p element which will later
195                # be removed when inserting raw html
196                block.tag = 'p'
197                block.text = placeholder
198
199
200class CodeHiliteExtension(markdown.Extension):
201    """ Add source code hilighting to markdown codeblocks. """
202
203    def __init__(self, configs):
204        # define default configs
205        self.config = {
206            'force_linenos' : [False, "Force line numbers - Default: False"],
207            'css_class' : ["codehilite",
208                           "Set class name for wrapper <div> - Default: codehilite"],
209            }
210
211        # Override defaults with user settings
212        for key, value in configs:
213            self.setConfig(key, value)
214
215    def extendMarkdown(self, md, md_globals):
216        """ Add HilitePostprocessor to Markdown instance. """
217        hiliter = HiliteTreeprocessor(md)
218        hiliter.config = self.config
219        md.treeprocessors.add("hilite", hiliter, "_begin")
220
221
222def makeExtension(configs={}):
223  return CodeHiliteExtension(configs=configs)
224
225