16516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru#!/usr/bin/python 26516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 36516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru""" 46516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruHeaderID Extension for Python-Markdown 56516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru====================================== 66516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 76516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruAdds ability to set HTML IDs for headers. 86516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 96516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruBasic usage: 106516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 116516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> import markdown 126516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> text = "# Some Header # {#some_id}" 136516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md = markdown.markdown(text, ['headerid']) 146516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md 156516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru u'<h1 id="some_id">Some Header</h1>' 166516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 176516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruAll header IDs are unique: 186516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 196516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> text = ''' 206516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... #Header 216516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... #Another Header {#header} 226516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... #Third Header {#header}''' 236516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md = markdown.markdown(text, ['headerid']) 246516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md 256516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru u'<h1 id="header">Header</h1>\\n<h1 id="header_1">Another Header</h1>\\n<h1 id="header_2">Third Header</h1>' 266516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 276516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruTo fit within a html template's hierarchy, set the header base level: 286516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 296516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> text = ''' 306516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... #Some Header 316516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... ## Next Level''' 326516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md = markdown.markdown(text, ['headerid(level=3)']) 336516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md 346516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru u'<h3 id="some_header">Some Header</h3>\\n<h4 id="next_level">Next Level</h4>' 356516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 366516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruTurn off auto generated IDs: 376516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 386516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> text = ''' 396516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... # Some Header 406516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... # Header with ID # { #foo }''' 416516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md = markdown.markdown(text, ['headerid(forceid=False)']) 426516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md 436516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru u'<h1>Some Header</h1>\\n<h1 id="foo">Header with ID</h1>' 446516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 456516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruUse with MetaData extension: 466516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 476516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> text = '''header_level: 2 486516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... header_forceid: Off 496516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... 506516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru ... # A Header''' 516516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md = markdown.markdown(text, ['headerid', 'meta']) 526516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru >>> md 536516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru u'<h2>A Header</h2>' 546516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 556516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruCopyright 2007-2008 [Waylan Limberg](http://achinghead.com/). 566516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 576516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruProject website: <http://www.freewisdom.org/project/python-markdown/HeaderId> 586516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruContact: markdown@freewisdom.org 596516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 606516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruLicense: BSD (see ../docs/LICENSE for details) 616516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 626516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruDependencies: 636516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Python 2.3+](http://python.org) 646516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) 656516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 666516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru""" 676516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 686516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport markdown 696516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querufrom markdown import etree 706516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport re 716516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querufrom string import ascii_lowercase, digits, punctuation 726516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 736516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruID_CHARS = ascii_lowercase + digits + '-_' 746516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruIDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') 756516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 766516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 776516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass HeaderIdProcessor(markdown.blockprocessors.BlockProcessor): 786516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru """ Replacement BlockProcessor for Header IDs. """ 796516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 806516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # Detect a header at start of any line in block 816516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru RE = re.compile(r"""(^|\n) 826516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru (?P<level>\#{1,6}) # group('level') = string of hashes 836516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru (?P<header>.*?) # group('header') = Header text 846516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru \#* # optional closing hashes 856516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru (?:[ \t]*\{[ \t]*\#(?P<id>[-_:a-zA-Z0-9]+)[ \t]*\})? 866516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru (\n|$) # ^^ group('id') = id attribute 876516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru """, 886516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru re.VERBOSE) 896516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 906516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru IDs = [] 916516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 926516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def test(self, parent, block): 936516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return bool(self.RE.search(block)) 946516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 956516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def run(self, parent, blocks): 966516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru block = blocks.pop(0) 976516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru m = self.RE.search(block) 986516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if m: 996516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru before = block[:m.start()] # All lines before header 1006516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru after = block[m.end():] # All lines after header 1016516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if before: 1026516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # As the header was not the first line of the block and the 1036516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # lines before the header must be parsed first, 1046516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # recursively parse this lines as a block. 1056516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.parser.parseBlocks(parent, [before]) 1066516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # Create header using named groups from RE 1076516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru start_level, force_id = self._get_meta() 1086516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru level = len(m.group('level')) + start_level 1096516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if level > 6: 1106516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru level = 6 1116516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h = markdown.etree.SubElement(parent, 'h%d' % level) 1126516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h.text = m.group('header').strip() 1136516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if m.group('id'): 1146516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h.set('id', self._unique_id(m.group('id'))) 1156516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru elif force_id: 1166516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h.set('id', self._create_id(m.group('header').strip())) 1176516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if after: 1186516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # Insert remaining lines as first block for future parsing. 1196516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru blocks.insert(0, after) 1206516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru else: 1216516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # This should never happen, but just in case... 1226516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru message(CRITICAL, "We've got a problem header!") 1236516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1246516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def _get_meta(self): 1256516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru """ Return meta data suported by this ext as a tuple """ 1266516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru level = int(self.config['level'][0]) - 1 1276516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru force = self._str2bool(self.config['forceid'][0]) 1286516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if hasattr(self.md, 'Meta'): 1296516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if self.md.Meta.has_key('header_level'): 1306516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru level = int(self.md.Meta['header_level'][0]) - 1 1316516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if self.md.Meta.has_key('header_forceid'): 1326516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru force = self._str2bool(self.md.Meta['header_forceid'][0]) 1336516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return level, force 1346516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1356516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def _str2bool(self, s, default=False): 1366516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru """ Convert a string to a booleen value. """ 1376516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru s = str(s) 1386516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if s.lower() in ['0', 'f', 'false', 'off', 'no', 'n']: 1396516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return False 1406516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru elif s.lower() in ['1', 't', 'true', 'on', 'yes', 'y']: 1416516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return True 1426516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return default 1436516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1446516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def _unique_id(self, id): 1456516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru """ Ensure ID is unique. Append '_1', '_2'... if not """ 1466516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru while id in self.IDs: 1476516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru m = IDCOUNT_RE.match(id) 1486516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if m: 1496516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru id = '%s_%d'% (m.group(1), int(m.group(2))+1) 1506516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru else: 1516516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru id = '%s_%d'% (id, 1) 1526516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.IDs.append(id) 1536516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return id 1546516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1556516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def _create_id(self, header): 1566516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru """ Return ID from Header text. """ 1576516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h = '' 1586516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru for c in header.lower().replace(' ', '_'): 1596516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru if c in ID_CHARS: 1606516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h += c 1616516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru elif c not in punctuation: 1626516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru h += '+' 1636516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return self._unique_id(h) 1646516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1656516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1666516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass HeaderIdExtension (markdown.Extension): 1676516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def __init__(self, configs): 1686516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # set defaults 1696516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.config = { 1706516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 'level' : ['1', 'Base level for headers.'], 1716516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 'forceid' : ['True', 'Force all headers to have an id.'] 1726516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru } 1736516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1746516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru for key, value in configs: 1756516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.setConfig(key, value) 1766516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1776516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def extendMarkdown(self, md, md_globals): 1786516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru md.registerExtension(self) 1796516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.processor = HeaderIdProcessor(md.parser) 1806516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.processor.md = md 1816516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.processor.config = self.config 1826516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru # Replace existing hasheader in place. 1836516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru md.parser.blockprocessors['hashheader'] = self.processor 1846516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1856516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru def reset(self): 1866516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru self.processor.IDs = [] 1876516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1886516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1896516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querudef makeExtension(configs=None): 1906516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru return HeaderIdExtension(configs=configs) 1916516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 1926516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruif __name__ == "__main__": 1936516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru import doctest 1946516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru doctest.testmod() 1956516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru 196