1# markdown is released under the BSD license
2# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4# Copyright 2004 Manfred Stienstra (the original version)
5#
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are met:
10#
11# *   Redistributions of source code must retain the above copyright
12#     notice, this list of conditions and the following disclaimer.
13# *   Redistributions in binary form must reproduce the above copyright
14#     notice, this list of conditions and the following disclaimer in the
15#     documentation and/or other materials provided with the distribution.
16# *   Neither the name of the <organization> nor the
17#     names of its contributors may be used to endorse or promote products
18#     derived from this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30# POSSIBILITY OF SUCH DAMAGE.
31
32
33"""
34Attribute List Extension for Python-Markdown
35============================================
36
37Adds attribute list syntax. Inspired by
38[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
39feature of the same name.
40
41Copyright 2011 [Waylan Limberg](http://achinghead.com/).
42
43Contact: markdown@freewisdom.org
44
45License: BSD (see ../LICENSE.md for details)
46
47Dependencies:
48* [Python 2.4+](http://python.org)
49* [Markdown 2.1+](http://packages.python.org/Markdown/)
50
51"""
52
53from __future__ import absolute_import
54from __future__ import unicode_literals
55from . import Extension
56from ..treeprocessors import Treeprocessor
57from ..util import isBlockLevel
58import re
59
60try:
61    Scanner = re.Scanner
62except AttributeError:
63    # must be on Python 2.4
64    from sre import Scanner
65
66def _handle_double_quote(s, t):
67    k, v = t.split('=')
68    return k, v.strip('"')
69
70def _handle_single_quote(s, t):
71    k, v = t.split('=')
72    return k, v.strip("'")
73
74def _handle_key_value(s, t):
75    return t.split('=')
76
77def _handle_word(s, t):
78    if t.startswith('.'):
79        return '.', t[1:]
80    if t.startswith('#'):
81        return 'id', t[1:]
82    return t, t
83
84_scanner = Scanner([
85    (r'[^ ]+=".*?"', _handle_double_quote),
86    (r"[^ ]+='.*?'", _handle_single_quote),
87    (r'[^ ]+=[^ ]*', _handle_key_value),
88    (r'[^ ]+', _handle_word),
89    (r' ', None)
90])
91
92def get_attrs(str):
93    """ Parse attribute list and return a list of attribute tuples. """
94    return _scanner.scan(str)[0]
95
96def isheader(elem):
97    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
98
99class AttrListTreeprocessor(Treeprocessor):
100
101    BASE_RE = r'\{\:?([^\}]*)\}'
102    HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE)
103    BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
104    INLINE_RE = re.compile(r'^%s' % BASE_RE)
105    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d'
106                         r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef'
107                         r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd'
108                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
109
110    def run(self, doc):
111        for elem in doc.getiterator():
112            if isBlockLevel(elem.tag):
113                # Block level: check for attrs on last line of text
114                RE = self.BLOCK_RE
115                if isheader(elem):
116                    # header: check for attrs at end of line
117                    RE = self.HEADER_RE
118                if len(elem) and elem[-1].tail:
119                    # has children. Get from tail of last child
120                    m = RE.search(elem[-1].tail)
121                    if m:
122                        self.assign_attrs(elem, m.group(1))
123                        elem[-1].tail = elem[-1].tail[:m.start()]
124                        if isheader(elem):
125                            # clean up trailing #s
126                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
127                elif elem.text:
128                    # no children. Get from text.
129                    m = RE.search(elem.text)
130                    if m:
131                        self.assign_attrs(elem, m.group(1))
132                        elem.text = elem.text[:m.start()]
133                        if isheader(elem):
134                            # clean up trailing #s
135                            elem.text = elem.text.rstrip('#').rstrip()
136            else:
137                # inline: check for attrs at start of tail
138                if elem.tail:
139                    m = self.INLINE_RE.match(elem.tail)
140                    if m:
141                        self.assign_attrs(elem, m.group(1))
142                        elem.tail = elem.tail[m.end():]
143
144    def assign_attrs(self, elem, attrs):
145        """ Assign attrs to element. """
146        for k, v in get_attrs(attrs):
147            if k == '.':
148                # add to class
149                cls = elem.get('class')
150                if cls:
151                    elem.set('class', '%s %s' % (cls, v))
152                else:
153                    elem.set('class', v)
154            else:
155                # assign attr k with v
156                elem.set(self.sanitize_name(k), v)
157
158    def sanitize_name(self, name):
159        """
160        Sanitize name as 'an XML Name, minus the ":"'.
161        See http://www.w3.org/TR/REC-xml-names/#NT-NCName
162        """
163        return self.NAME_RE.sub('_', name)
164
165
166class AttrListExtension(Extension):
167    def extendMarkdown(self, md, md_globals):
168        md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify')
169
170
171def makeExtension(configs={}):
172    return AttrListExtension(configs=configs)
173