1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3""" 4Middleware that tests the validity of all generated HTML using the 5`WDG HTML Validator <http://www.htmlhelp.com/tools/validator/>`_ 6""" 7 8from cStringIO import StringIO 9import subprocess 10from paste.response import header_value 11import re 12import cgi 13 14__all__ = ['WDGValidateMiddleware'] 15 16class WDGValidateMiddleware(object): 17 18 """ 19 Middleware that checks HTML and appends messages about the validity of 20 the HTML. Uses: http://www.htmlhelp.com/tools/validator/ -- interacts 21 with the command line client. Use the configuration ``wdg_path`` to 22 override the path (default: looks for ``validate`` in $PATH). 23 24 To install, in your web context's __init__.py:: 25 26 def urlparser_wrap(environ, start_response, app): 27 return wdg_validate.WDGValidateMiddleware(app)( 28 environ, start_response) 29 30 Or in your configuration:: 31 32 middleware.append('paste.wdg_validate.WDGValidateMiddleware') 33 """ 34 35 _end_body_regex = re.compile(r'</body>', re.I) 36 37 def __init__(self, app, global_conf=None, wdg_path='validate'): 38 self.app = app 39 self.wdg_path = wdg_path 40 41 def __call__(self, environ, start_response): 42 output = StringIO() 43 response = [] 44 45 def writer_start_response(status, headers, exc_info=None): 46 response.extend((status, headers)) 47 start_response(status, headers, exc_info) 48 return output.write 49 50 app_iter = self.app(environ, writer_start_response) 51 try: 52 for s in app_iter: 53 output.write(s) 54 finally: 55 if hasattr(app_iter, 'close'): 56 app_iter.close() 57 page = output.getvalue() 58 status, headers = response 59 v = header_value(headers, 'content-type') or '' 60 if (not v.startswith('text/html') 61 and not v.startswith('text/xhtml') 62 and not v.startswith('application/xhtml')): 63 # Can't validate 64 # @@: Should validate CSS too... but using what? 65 return [page] 66 ops = [] 67 if v.startswith('text/xhtml+xml'): 68 ops.append('--xml') 69 # @@: Should capture encoding too 70 html_errors = self.call_wdg_validate( 71 self.wdg_path, ops, page) 72 if html_errors: 73 page = self.add_error(page, html_errors)[0] 74 headers.remove( 75 ('Content-Length', 76 str(header_value(headers, 'content-length')))) 77 headers.append(('Content-Length', str(len(page)))) 78 return [page] 79 80 def call_wdg_validate(self, wdg_path, ops, page): 81 if subprocess is None: 82 raise ValueError( 83 "This middleware requires the subprocess module from " 84 "Python 2.4") 85 proc = subprocess.Popen([wdg_path] + ops, 86 shell=False, 87 close_fds=True, 88 stdout=subprocess.PIPE, 89 stdin=subprocess.PIPE, 90 stderr=subprocess.STDOUT) 91 stdout = proc.communicate(page)[0] 92 proc.wait() 93 return stdout 94 95 def add_error(self, html_page, html_errors): 96 add_text = ('<pre style="background-color: #ffd; color: #600; ' 97 'border: 1px solid #000;">%s</pre>' 98 % cgi.escape(html_errors)) 99 match = self._end_body_regex.search(html_page) 100 if match: 101 return [html_page[:match.start()] 102 + add_text 103 + html_page[match.start():]] 104 else: 105 return [html_page + add_text] 106 107def make_wdg_validate_middleware( 108 app, global_conf, wdg_path='validate'): 109 """ 110 Wraps the application in the WDG validator from 111 http://www.htmlhelp.com/tools/validator/ 112 113 Validation errors are appended to the text of each page. 114 You can configure this by giving the path to the validate 115 executable (by default picked up from $PATH) 116 """ 117 return WDGValidateMiddleware( 118 app, global_conf, wdg_path=wdg_path) 119