16516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru#!/usr/bin/python
26516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
36516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru"""
46516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruHeaderID Extension for Python-Markdown
56516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru======================================
66516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
76516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruAdds ability to set HTML IDs for headers.
86516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
96516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruBasic usage:
106516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
116516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> import markdown
126516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> text = "# Some Header # {#some_id}"
136516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md = markdown.markdown(text, ['headerid'])
146516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md
156516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    u'<h1 id="some_id">Some Header</h1>'
166516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
176516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruAll header IDs are unique:
186516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
196516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> text = '''
206516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... #Header
216516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... #Another Header {#header}
226516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... #Third Header {#header}'''
236516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md = markdown.markdown(text, ['headerid'])
246516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md
256516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    u'<h1 id="header">Header</h1>\\n<h1 id="header_1">Another Header</h1>\\n<h1 id="header_2">Third Header</h1>'
266516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
276516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruTo fit within a html template's hierarchy, set the header base level:
286516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
296516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> text = '''
306516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... #Some Header
316516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... ## Next Level'''
326516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md = markdown.markdown(text, ['headerid(level=3)'])
336516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md
346516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    u'<h3 id="some_header">Some Header</h3>\\n<h4 id="next_level">Next Level</h4>'
356516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
366516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruTurn off auto generated IDs:
376516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
386516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> text = '''
396516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... # Some Header
406516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... # Header with ID # { #foo }'''
416516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md = markdown.markdown(text, ['headerid(forceid=False)'])
426516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md
436516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    u'<h1>Some Header</h1>\\n<h1 id="foo">Header with ID</h1>'
446516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
456516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruUse with MetaData extension:
466516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
476516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> text = '''header_level: 2
486516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... header_forceid: Off
496516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ...
506516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... # A Header'''
516516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md = markdown.markdown(text, ['headerid', 'meta'])
526516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> md
536516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    u'<h2>A Header</h2>'
546516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
556516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruCopyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
566516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
576516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruProject website: <http://www.freewisdom.org/project/python-markdown/HeaderId>
586516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruContact: markdown@freewisdom.org
596516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
606516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruLicense: BSD (see ../docs/LICENSE for details)
616516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
626516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruDependencies:
636516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Python 2.3+](http://python.org)
646516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
656516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
666516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru"""
676516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
686516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport markdown
696516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querufrom markdown import etree
706516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport re
716516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querufrom string import ascii_lowercase, digits, punctuation
726516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
736516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruID_CHARS = ascii_lowercase + digits + '-_'
746516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruIDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
756516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
766516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
776516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass HeaderIdProcessor(markdown.blockprocessors.BlockProcessor):
786516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    """ Replacement BlockProcessor for Header IDs. """
796516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
806516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    # Detect a header at start of any line in block
816516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    RE = re.compile(r"""(^|\n)
826516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                        (?P<level>\#{1,6})  # group('level') = string of hashes
836516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                        (?P<header>.*?)     # group('header') = Header text
846516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                        \#*                 # optional closing hashes
856516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                        (?:[ \t]*\{[ \t]*\#(?P<id>[-_:a-zA-Z0-9]+)[ \t]*\})?
866516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                        (\n|$)              #  ^^ group('id') = id attribute
876516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                     """,
886516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                     re.VERBOSE)
896516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
906516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    IDs = []
916516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
926516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def test(self, parent, block):
936516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return bool(self.RE.search(block))
946516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
956516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def run(self, parent, blocks):
966516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        block = blocks.pop(0)
976516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        m = self.RE.search(block)
986516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        if m:
996516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            before = block[:m.start()] # All lines before header
1006516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            after = block[m.end():]    # All lines after header
1016516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if before:
1026516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                # As the header was not the first line of the block and the
1036516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                # lines before the header must be parsed first,
1046516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                # recursively parse this lines as a block.
1056516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                self.parser.parseBlocks(parent, [before])
1066516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            # Create header using named groups from RE
1076516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            start_level, force_id = self._get_meta()
1086516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            level = len(m.group('level')) + start_level
1096516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if level > 6:
1106516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                level = 6
1116516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            h = markdown.etree.SubElement(parent, 'h%d' % level)
1126516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            h.text = m.group('header').strip()
1136516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if m.group('id'):
1146516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                h.set('id', self._unique_id(m.group('id')))
1156516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            elif force_id:
1166516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                h.set('id', self._create_id(m.group('header').strip()))
1176516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if after:
1186516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                # Insert remaining lines as first block for future parsing.
1196516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                blocks.insert(0, after)
1206516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        else:
1216516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            # This should never happen, but just in case...
1226516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            message(CRITICAL, "We've got a problem header!")
1236516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1246516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def _get_meta(self):
1256516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        """ Return meta data suported by this ext as a tuple """
1266516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        level = int(self.config['level'][0]) - 1
1276516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        force = self._str2bool(self.config['forceid'][0])
1286516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        if hasattr(self.md, 'Meta'):
1296516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if self.md.Meta.has_key('header_level'):
1306516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                level = int(self.md.Meta['header_level'][0]) - 1
1316516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if self.md.Meta.has_key('header_forceid'):
1326516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                force = self._str2bool(self.md.Meta['header_forceid'][0])
1336516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return level, force
1346516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1356516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def _str2bool(self, s, default=False):
1366516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        """ Convert a string to a booleen value. """
1376516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        s = str(s)
1386516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        if s.lower() in ['0', 'f', 'false', 'off', 'no', 'n']:
1396516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            return False
1406516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        elif s.lower() in ['1', 't', 'true', 'on', 'yes', 'y']:
1416516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            return True
1426516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return default
1436516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1446516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def _unique_id(self, id):
1456516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        """ Ensure ID is unique. Append '_1', '_2'... if not """
1466516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        while id in self.IDs:
1476516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            m = IDCOUNT_RE.match(id)
1486516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if m:
1496516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                id = '%s_%d'% (m.group(1), int(m.group(2))+1)
1506516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            else:
1516516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                id = '%s_%d'% (id, 1)
1526516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.IDs.append(id)
1536516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return id
1546516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1556516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def _create_id(self, header):
1566516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        """ Return ID from Header text. """
1576516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        h = ''
1586516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        for c in header.lower().replace(' ', '_'):
1596516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if c in ID_CHARS:
1606516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                h += c
1616516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            elif c not in punctuation:
1626516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                h += '+'
1636516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return self._unique_id(h)
1646516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1656516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1666516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass HeaderIdExtension (markdown.Extension):
1676516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def __init__(self, configs):
1686516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # set defaults
1696516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.config = {
1706516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                'level' : ['1', 'Base level for headers.'],
1716516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                'forceid' : ['True', 'Force all headers to have an id.']
1726516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            }
1736516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1746516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        for key, value in configs:
1756516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            self.setConfig(key, value)
1766516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1776516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def extendMarkdown(self, md, md_globals):
1786516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        md.registerExtension(self)
1796516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.processor = HeaderIdProcessor(md.parser)
1806516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.processor.md = md
1816516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.processor.config = self.config
1826516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # Replace existing hasheader in place.
1836516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        md.parser.blockprocessors['hashheader'] = self.processor
1846516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1856516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def reset(self):
1866516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.processor.IDs = []
1876516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1886516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1896516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querudef makeExtension(configs=None):
1906516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    return HeaderIdExtension(configs=configs)
1916516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
1926516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruif __name__ == "__main__":
1936516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    import doctest
1946516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    doctest.testmod()
1956516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
196