1# markdown is released under the BSD license 2# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) 3# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 4# Copyright 2004 Manfred Stienstra (the original version) 5# 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are met: 10# 11# * Redistributions of source code must retain the above copyright 12# notice, this list of conditions and the following disclaimer. 13# * Redistributions in binary form must reproduce the above copyright 14# notice, this list of conditions and the following disclaimer in the 15# documentation and/or other materials provided with the distribution. 16# * Neither the name of the <organization> nor the 17# names of its contributors may be used to endorse or promote products 18# derived from this software without specific prior written permission. 19# 20# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY 21# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT 24# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30# POSSIBILITY OF SUCH DAMAGE. 31 32 33""" 34Attribute List Extension for Python-Markdown 35============================================ 36 37Adds attribute list syntax. Inspired by 38[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s 39feature of the same name. 40 41Copyright 2011 [Waylan Limberg](http://achinghead.com/). 42 43Contact: markdown@freewisdom.org 44 45License: BSD (see ../LICENSE.md for details) 46 47Dependencies: 48* [Python 2.4+](http://python.org) 49* [Markdown 2.1+](http://packages.python.org/Markdown/) 50 51""" 52 53from __future__ import absolute_import 54from __future__ import unicode_literals 55from . import Extension 56from ..treeprocessors import Treeprocessor 57from ..util import isBlockLevel 58import re 59 60try: 61 Scanner = re.Scanner 62except AttributeError: 63 # must be on Python 2.4 64 from sre import Scanner 65 66def _handle_double_quote(s, t): 67 k, v = t.split('=') 68 return k, v.strip('"') 69 70def _handle_single_quote(s, t): 71 k, v = t.split('=') 72 return k, v.strip("'") 73 74def _handle_key_value(s, t): 75 return t.split('=') 76 77def _handle_word(s, t): 78 if t.startswith('.'): 79 return '.', t[1:] 80 if t.startswith('#'): 81 return 'id', t[1:] 82 return t, t 83 84_scanner = Scanner([ 85 (r'[^ ]+=".*?"', _handle_double_quote), 86 (r"[^ ]+='.*?'", _handle_single_quote), 87 (r'[^ ]+=[^ ]*', _handle_key_value), 88 (r'[^ ]+', _handle_word), 89 (r' ', None) 90]) 91 92def get_attrs(str): 93 """ Parse attribute list and return a list of attribute tuples. """ 94 return _scanner.scan(str)[0] 95 96def isheader(elem): 97 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] 98 99class AttrListTreeprocessor(Treeprocessor): 100 101 BASE_RE = r'\{\:?([^\}]*)\}' 102 HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE) 103 BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) 104 INLINE_RE = re.compile(r'^%s' % BASE_RE) 105 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d' 106 r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef' 107 r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd' 108 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') 109 110 def run(self, doc): 111 for elem in doc.getiterator(): 112 if isBlockLevel(elem.tag): 113 # Block level: check for attrs on last line of text 114 RE = self.BLOCK_RE 115 if isheader(elem): 116 # header: check for attrs at end of line 117 RE = self.HEADER_RE 118 if len(elem) and elem[-1].tail: 119 # has children. Get from tail of last child 120 m = RE.search(elem[-1].tail) 121 if m: 122 self.assign_attrs(elem, m.group(1)) 123 elem[-1].tail = elem[-1].tail[:m.start()] 124 if isheader(elem): 125 # clean up trailing #s 126 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() 127 elif elem.text: 128 # no children. Get from text. 129 m = RE.search(elem.text) 130 if m: 131 self.assign_attrs(elem, m.group(1)) 132 elem.text = elem.text[:m.start()] 133 if isheader(elem): 134 # clean up trailing #s 135 elem.text = elem.text.rstrip('#').rstrip() 136 else: 137 # inline: check for attrs at start of tail 138 if elem.tail: 139 m = self.INLINE_RE.match(elem.tail) 140 if m: 141 self.assign_attrs(elem, m.group(1)) 142 elem.tail = elem.tail[m.end():] 143 144 def assign_attrs(self, elem, attrs): 145 """ Assign attrs to element. """ 146 for k, v in get_attrs(attrs): 147 if k == '.': 148 # add to class 149 cls = elem.get('class') 150 if cls: 151 elem.set('class', '%s %s' % (cls, v)) 152 else: 153 elem.set('class', v) 154 else: 155 # assign attr k with v 156 elem.set(self.sanitize_name(k), v) 157 158 def sanitize_name(self, name): 159 """ 160 Sanitize name as 'an XML Name, minus the ":"'. 161 See http://www.w3.org/TR/REC-xml-names/#NT-NCName 162 """ 163 return self.NAME_RE.sub('_', name) 164 165 166class AttrListExtension(Extension): 167 def extendMarkdown(self, md, md_globals): 168 md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify') 169 170 171def makeExtension(configs={}): 172 return AttrListExtension(configs=configs) 173