16516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru'''
26516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruAbbreviation Extension for Python-Markdown
36516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru==========================================
46516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
56516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruThis extension adds abbreviation handling to Python-Markdown.
66516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
76516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruSimple Usage:
86516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
96516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> import markdown
106516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> text = """
116516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... Some text with an ABBR and a REF. Ignore REFERENCE and ref.
126516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ...
136516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... *[ABBR]: Abbreviation
146516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... *[REF]: Abbreviation Reference
156516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    ... """
166516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    >>> markdown.markdown(text, ['abbr'])
176516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    u'<p>Some text with an <abbr title="Abbreviation">ABBR</abbr> and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore REFERENCE and ref.</p>'
186516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
196516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruCopyright 2007-2008
206516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Waylan Limberg](http://achinghead.com/)
216516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Seemant Kulleen](http://www.kulleen.org/)
226516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
236516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
246516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru'''
256516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
266516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport markdown, re
276516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querufrom markdown import etree
286516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
296516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru# Global Vars
306516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')
316516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
326516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass AbbrExtension(markdown.Extension):
336516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    """ Abbreviation Extension for Python-Markdown. """
346516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
356516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def extendMarkdown(self, md, md_globals):
366516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        """ Insert AbbrPreprocessor before ReferencePreprocessor. """
376516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        md.preprocessors.add('abbr', AbbrPreprocessor(md), '<reference')
386516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
396516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
406516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass AbbrPreprocessor(markdown.preprocessors.Preprocessor):
416516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    """ Abbreviation Preprocessor - parse text for abbr references. """
426516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
436516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def run(self, lines):
446516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        '''
456516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        Find and remove all Abbreviation references from the text.
466516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        Each reference is set as a new AbbrPattern in the markdown instance.
476516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
486516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        '''
496516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        new_text = []
506516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        for line in lines:
516516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            m = ABBR_REF_RE.match(line)
526516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            if m:
536516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                abbr = m.group('abbr').strip()
546516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                title = m.group('title').strip()
556516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                self.markdown.inlinePatterns['abbr-%s'%abbr] = \
566516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                    AbbrPattern(self._generate_pattern(abbr), title)
576516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            else:
586516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                new_text.append(line)
596516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return new_text
606516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
616516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def _generate_pattern(self, text):
626516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        '''
636516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        Given a string, returns an regex pattern to match that string.
646516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
656516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        'HTML' -> r'(?P<abbr>[H][T][M][L])'
666516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
676516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        Note: we force each char as a literal match (in brackets) as we don't
686516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        know what they will be beforehand.
696516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
706516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        '''
716516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        chars = list(text)
726516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        for i in range(len(chars)):
736516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            chars[i] = r'[%s]' % chars[i]
746516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return r'(?P<abbr>\b%s\b)' % (r''.join(chars))
756516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
766516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
776516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass AbbrPattern(markdown.inlinepatterns.Pattern):
786516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    """ Abbreviation inline pattern. """
796516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
806516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def __init__(self, pattern, title):
816516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        markdown.inlinepatterns.Pattern.__init__(self, pattern)
826516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.title = title
836516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
846516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def handleMatch(self, m):
856516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        abbr = etree.Element('abbr')
866516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        abbr.text = m.group('abbr')
876516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        abbr.set('title', self.title)
886516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return abbr
896516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
906516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querudef makeExtension(configs=None):
916516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    return AbbrExtension(configs=configs)
926516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
936516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruif __name__ == "__main__":
946516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    import doctest
956516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    doctest.testmod()
96