1'''
2Abbreviation Extension for Python-Markdown
3==========================================
4
5This extension adds abbreviation handling to Python-Markdown.
6
7Simple Usage:
8
9    >>> import markdown
10    >>> text = """
11    ... Some text with an ABBR and a REF. Ignore REFERENCE and ref.
12    ...
13    ... *[ABBR]: Abbreviation
14    ... *[REF]: Abbreviation Reference
15    ... """
16    >>> markdown.markdown(text, ['abbr'])
17    u'<p>Some text with an <abbr title="Abbreviation">ABBR</abbr> and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore REFERENCE and ref.</p>'
18
19Copyright 2007-2008
20* [Waylan Limberg](http://achinghead.com/)
21* [Seemant Kulleen](http://www.kulleen.org/)
22
23
24'''
25
26import markdown, re
27from markdown import etree
28
29# Global Vars
30ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')
31
32class AbbrExtension(markdown.Extension):
33    """ Abbreviation Extension for Python-Markdown. """
34
35    def extendMarkdown(self, md, md_globals):
36        """ Insert AbbrPreprocessor before ReferencePreprocessor. """
37        md.preprocessors.add('abbr', AbbrPreprocessor(md), '<reference')
38
39
40class AbbrPreprocessor(markdown.preprocessors.Preprocessor):
41    """ Abbreviation Preprocessor - parse text for abbr references. """
42
43    def run(self, lines):
44        '''
45        Find and remove all Abbreviation references from the text.
46        Each reference is set as a new AbbrPattern in the markdown instance.
47
48        '''
49        new_text = []
50        for line in lines:
51            m = ABBR_REF_RE.match(line)
52            if m:
53                abbr = m.group('abbr').strip()
54                title = m.group('title').strip()
55                self.markdown.inlinePatterns['abbr-%s'%abbr] = \
56                    AbbrPattern(self._generate_pattern(abbr), title)
57            else:
58                new_text.append(line)
59        return new_text
60
61    def _generate_pattern(self, text):
62        '''
63        Given a string, returns an regex pattern to match that string.
64
65        'HTML' -> r'(?P<abbr>[H][T][M][L])'
66
67        Note: we force each char as a literal match (in brackets) as we don't
68        know what they will be beforehand.
69
70        '''
71        chars = list(text)
72        for i in range(len(chars)):
73            chars[i] = r'[%s]' % chars[i]
74        return r'(?P<abbr>\b%s\b)' % (r''.join(chars))
75
76
77class AbbrPattern(markdown.inlinepatterns.Pattern):
78    """ Abbreviation inline pattern. """
79
80    def __init__(self, pattern, title):
81        markdown.inlinepatterns.Pattern.__init__(self, pattern)
82        self.title = title
83
84    def handleMatch(self, m):
85        abbr = etree.Element('abbr')
86        abbr.text = m.group('abbr')
87        abbr.set('title', self.title)
88        return abbr
89
90def makeExtension(configs=None):
91    return AbbrExtension(configs=configs)
92
93if __name__ == "__main__":
94    import doctest
95    doctest.testmod()
96