1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3"""
4Middleware that tests the validity of all generated HTML using the
5`WDG HTML Validator <http://www.htmlhelp.com/tools/validator/>`_
6"""
7
8from cStringIO import StringIO
9import subprocess
10from paste.response import header_value
11import re
12import cgi
13
14__all__ = ['WDGValidateMiddleware']
15
16class WDGValidateMiddleware(object):
17
18    """
19    Middleware that checks HTML and appends messages about the validity of
20    the HTML.  Uses: http://www.htmlhelp.com/tools/validator/ -- interacts
21    with the command line client.  Use the configuration ``wdg_path`` to
22    override the path (default: looks for ``validate`` in $PATH).
23
24    To install, in your web context's __init__.py::
25
26        def urlparser_wrap(environ, start_response, app):
27            return wdg_validate.WDGValidateMiddleware(app)(
28                environ, start_response)
29
30    Or in your configuration::
31
32        middleware.append('paste.wdg_validate.WDGValidateMiddleware')
33    """
34
35    _end_body_regex = re.compile(r'</body>', re.I)
36
37    def __init__(self, app, global_conf=None, wdg_path='validate'):
38        self.app = app
39        self.wdg_path = wdg_path
40
41    def __call__(self, environ, start_response):
42        output = StringIO()
43        response = []
44
45        def writer_start_response(status, headers, exc_info=None):
46            response.extend((status, headers))
47            start_response(status, headers, exc_info)
48            return output.write
49
50        app_iter = self.app(environ, writer_start_response)
51        try:
52            for s in app_iter:
53                output.write(s)
54        finally:
55            if hasattr(app_iter, 'close'):
56                app_iter.close()
57        page = output.getvalue()
58        status, headers = response
59        v = header_value(headers, 'content-type') or ''
60        if (not v.startswith('text/html')
61            and not v.startswith('text/xhtml')
62            and not v.startswith('application/xhtml')):
63            # Can't validate
64            # @@: Should validate CSS too... but using what?
65            return [page]
66        ops = []
67        if v.startswith('text/xhtml+xml'):
68            ops.append('--xml')
69        # @@: Should capture encoding too
70        html_errors = self.call_wdg_validate(
71            self.wdg_path, ops, page)
72        if html_errors:
73            page = self.add_error(page, html_errors)[0]
74            headers.remove(
75                     ('Content-Length',
76                      str(header_value(headers, 'content-length'))))
77            headers.append(('Content-Length', str(len(page))))
78        return [page]
79
80    def call_wdg_validate(self, wdg_path, ops, page):
81        if subprocess is None:
82            raise ValueError(
83                "This middleware requires the subprocess module from "
84                "Python 2.4")
85        proc = subprocess.Popen([wdg_path] + ops,
86                                shell=False,
87                                close_fds=True,
88                                stdout=subprocess.PIPE,
89                                stdin=subprocess.PIPE,
90                                stderr=subprocess.STDOUT)
91        stdout = proc.communicate(page)[0]
92        proc.wait()
93        return stdout
94
95    def add_error(self, html_page, html_errors):
96        add_text = ('<pre style="background-color: #ffd; color: #600; '
97                    'border: 1px solid #000;">%s</pre>'
98                    % cgi.escape(html_errors))
99        match = self._end_body_regex.search(html_page)
100        if match:
101            return [html_page[:match.start()]
102                    + add_text
103                    + html_page[match.start():]]
104        else:
105            return [html_page + add_text]
106
107def make_wdg_validate_middleware(
108    app, global_conf, wdg_path='validate'):
109    """
110    Wraps the application in the WDG validator from
111    http://www.htmlhelp.com/tools/validator/
112
113    Validation errors are appended to the text of each page.
114    You can configure this by giving the path to the validate
115    executable (by default picked up from $PATH)
116    """
117    return WDGValidateMiddleware(
118        app, global_conf, wdg_path=wdg_path)
119