16516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru#!/usr/bin/env python
26516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
36516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru"""
46516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruHTML Tidy Extension for Python-Markdown
56516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru=======================================
66516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
76516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruRuns [HTML Tidy][] on the output of Python-Markdown using the [uTidylib][]
86516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruPython wrapper. Both libtidy and uTidylib must be installed on your system.
96516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
106516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruNote than any Tidy [options][] can be passed in as extension configs. So,
116516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querufor example, to output HTML rather than XHTML, set ``output_xhtml=0``. To
126516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruindent the output, set ``indent=auto`` and to have Tidy wrap the output in
136516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru``<html>`` and ``<body>`` tags, set ``show_body_only=0``.
146516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
156516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru[HTML Tidy]: http://tidy.sourceforge.net/
166516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru[uTidylib]: http://utidylib.berlios.de/
176516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru[options]: http://tidy.sourceforge.net/docs/quickref.html
186516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
196516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruCopyright (c)2008 [Waylan Limberg](http://achinghead.com)
206516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
216516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruLicense: [BSD](http://www.opensource.org/licenses/bsd-license.php)
226516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
236516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste QueruDependencies:
246516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Python2.3+](http://python.org)
256516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
266516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [HTML Tidy](http://utidylib.berlios.de/)
276516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru* [uTidylib](http://utidylib.berlios.de/)
286516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
296516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru"""
306516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
316516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport markdown
326516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruimport tidy
336516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
346516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass TidyExtension(markdown.Extension):
356516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
366516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def __init__(self, configs):
376516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # Set defaults to match typical markdown behavior.
386516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        self.config = dict(output_xhtml=1,
396516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                           show_body_only=1,
406516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                          )
416516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # Merge in user defined configs overriding any present if nessecary.
426516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        for c in configs:
436516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru            self.config[c[0]] = c[1]
446516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
456516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def extendMarkdown(self, md, md_globals):
466516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # Save options to markdown instance
476516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        md.tidy_options = self.config
486516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # Add TidyProcessor to postprocessors
496516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        md.postprocessors['tidy'] = TidyProcessor(md)
506516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
516516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
526516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queruclass TidyProcessor(markdown.postprocessors.Postprocessor):
536516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
546516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    def run(self, text):
556516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # Pass text to Tidy. As Tidy does not accept unicode we need to encode
566516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        # it and decode its return value.
576516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru        return unicode(tidy.parseString(text.encode('utf-8'),
586516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru                                        **self.markdown.tidy_options))
596516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
606516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru
616516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Querudef makeExtension(configs=None):
626516b99bb74dfb7187a08f7090bf7ca22a006f15Jean-Baptiste Queru    return TidyExtension(configs=configs)
63