1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3# Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php
4# Licensed to PSF under a Contributor Agreement
5"""
6Middleware to check for obedience to the WSGI specification.
7
8Some of the things this checks:
9
10* Signature of the application and start_response (including that
11  keyword arguments are not used).
12
13* Environment checks:
14
15  - Environment is a dictionary (and not a subclass).
16
17  - That all the required keys are in the environment: REQUEST_METHOD,
18    SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors,
19    wsgi.multithread, wsgi.multiprocess, wsgi.run_once
20
21  - That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the
22    environment (these headers should appear as CONTENT_LENGTH and
23    CONTENT_TYPE).
24
25  - Warns if QUERY_STRING is missing, as the cgi module acts
26    unpredictably in that case.
27
28  - That CGI-style variables (that don't contain a .) have
29    (non-unicode) string values
30
31  - That wsgi.version is a tuple
32
33  - That wsgi.url_scheme is 'http' or 'https' (@@: is this too
34    restrictive?)
35
36  - Warns if the REQUEST_METHOD is not known (@@: probably too
37    restrictive).
38
39  - That SCRIPT_NAME and PATH_INFO are empty or start with /
40
41  - That at least one of SCRIPT_NAME or PATH_INFO are set.
42
43  - That CONTENT_LENGTH is a positive integer.
44
45  - That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should
46    be '/').
47
48  - That wsgi.input has the methods read, readline, readlines, and
49    __iter__
50
51  - That wsgi.errors has the methods flush, write, writelines
52
53* The status is a string, contains a space, starts with an integer,
54  and that integer is in range (> 100).
55
56* That the headers is a list (not a subclass, not another kind of
57  sequence).
58
59* That the items of the headers are tuples of strings.
60
61* That there is no 'status' header (that is used in CGI, but not in
62  WSGI).
63
64* That the headers don't contain newlines or colons, end in _ or -, or
65  contain characters codes below 037.
66
67* That Content-Type is given if there is content (CGI often has a
68  default content type, but WSGI does not).
69
70* That no Content-Type is given when there is no content (@@: is this
71  too restrictive?)
72
73* That the exc_info argument to start_response is a tuple or None.
74
75* That all calls to the writer are with strings, and no other methods
76  on the writer are accessed.
77
78* That wsgi.input is used properly:
79
80  - .read() is called with zero or one argument
81
82  - That it returns a string
83
84  - That readline, readlines, and __iter__ return strings
85
86  - That .close() is not called
87
88  - No other methods are provided
89
90* That wsgi.errors is used properly:
91
92  - .write() and .writelines() is called with a string
93
94  - That .close() is not called, and no other methods are provided.
95
96* The response iterator:
97
98  - That it is not a string (it should be a list of a single string; a
99    string will work, but perform horribly).
100
101  - That .next() returns a string
102
103  - That the iterator is not iterated over until start_response has
104    been called (that can signal either a server or application
105    error).
106
107  - That .close() is called (doesn't raise exception, only prints to
108    sys.stderr, because we only know it isn't called when the object
109    is garbage collected).
110"""
111
112import re
113import six
114import sys
115import warnings
116
117header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$')
118bad_header_value_re = re.compile(r'[\000-\037]')
119
120class WSGIWarning(Warning):
121    """
122    Raised in response to WSGI-spec-related warnings
123    """
124
125def middleware(application, global_conf=None):
126
127    """
128    When applied between a WSGI server and a WSGI application, this
129    middleware will check for WSGI compliancy on a number of levels.
130    This middleware does not modify the request or response in any
131    way, but will throw an AssertionError if anything seems off
132    (except for a failure to close the application iterator, which
133    will be printed to stderr -- there's no way to throw an exception
134    at that point).
135    """
136
137    def lint_app(*args, **kw):
138        assert len(args) == 2, "Two arguments required"
139        assert not kw, "No keyword arguments allowed"
140        environ, start_response = args
141
142        check_environ(environ)
143
144        # We use this to check if the application returns without
145        # calling start_response:
146        start_response_started = []
147
148        def start_response_wrapper(*args, **kw):
149            assert len(args) == 2 or len(args) == 3, (
150                "Invalid number of arguments: %s" % args)
151            assert not kw, "No keyword arguments allowed"
152            status = args[0]
153            headers = args[1]
154            if len(args) == 3:
155                exc_info = args[2]
156            else:
157                exc_info = None
158
159            check_status(status)
160            check_headers(headers)
161            check_content_type(status, headers)
162            check_exc_info(exc_info)
163
164            start_response_started.append(None)
165            return WriteWrapper(start_response(*args))
166
167        environ['wsgi.input'] = InputWrapper(environ['wsgi.input'])
168        environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors'])
169
170        iterator = application(environ, start_response_wrapper)
171        assert iterator is not None and iterator != False, (
172            "The application must return an iterator, if only an empty list")
173
174        check_iterator(iterator)
175
176        return IteratorWrapper(iterator, start_response_started)
177
178    return lint_app
179
180class InputWrapper(object):
181
182    def __init__(self, wsgi_input):
183        self.input = wsgi_input
184
185    def read(self, *args):
186        assert len(args) <= 1
187        v = self.input.read(*args)
188        assert isinstance(v, six.binary_type)
189        return v
190
191    def readline(self, *args):
192        v = self.input.readline(*args)
193        assert isinstance(v, six.binary_type)
194        return v
195
196    def readlines(self, *args):
197        assert len(args) <= 1
198        lines = self.input.readlines(*args)
199        assert isinstance(lines, list)
200        for line in lines:
201            assert isinstance(line, six.binary_type)
202        return lines
203
204    def __iter__(self):
205        while 1:
206            line = self.readline()
207            if not line:
208                return
209            yield line
210
211    def close(self):
212        assert 0, "input.close() must not be called"
213
214class ErrorWrapper(object):
215
216    def __init__(self, wsgi_errors):
217        self.errors = wsgi_errors
218
219    def write(self, s):
220        assert isinstance(s, bytes)
221        self.errors.write(s)
222
223    def flush(self):
224        self.errors.flush()
225
226    def writelines(self, seq):
227        for line in seq:
228            self.write(line)
229
230    def close(self):
231        assert 0, "errors.close() must not be called"
232
233class WriteWrapper(object):
234
235    def __init__(self, wsgi_writer):
236        self.writer = wsgi_writer
237
238    def __call__(self, s):
239        assert isinstance(s, six.binary_type)
240        self.writer(s)
241
242class PartialIteratorWrapper(object):
243
244    def __init__(self, wsgi_iterator):
245        self.iterator = wsgi_iterator
246
247    def __iter__(self):
248        # We want to make sure __iter__ is called
249        return IteratorWrapper(self.iterator)
250
251class IteratorWrapper(object):
252
253    def __init__(self, wsgi_iterator, check_start_response):
254        self.original_iterator = wsgi_iterator
255        self.iterator = iter(wsgi_iterator)
256        self.closed = False
257        self.check_start_response = check_start_response
258
259    def __iter__(self):
260        return self
261
262    def next(self):
263        assert not self.closed, (
264            "Iterator read after closed")
265        v = six.next(self.iterator)
266        if self.check_start_response is not None:
267            assert self.check_start_response, (
268                "The application returns and we started iterating over its body, but start_response has not yet been called")
269            self.check_start_response = None
270        return v
271
272    __next__ = next
273
274    def close(self):
275        self.closed = True
276        if hasattr(self.original_iterator, 'close'):
277            self.original_iterator.close()
278
279    def __del__(self):
280        if not self.closed:
281            sys.stderr.write(
282                "Iterator garbage collected without being closed")
283        assert self.closed, (
284            "Iterator garbage collected without being closed")
285
286def check_environ(environ):
287    assert isinstance(environ,dict), (
288        "Environment is not of the right type: %r (environment: %r)"
289        % (type(environ), environ))
290
291    for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
292                'wsgi.version', 'wsgi.input', 'wsgi.errors',
293                'wsgi.multithread', 'wsgi.multiprocess',
294                'wsgi.run_once']:
295        assert key in environ, (
296            "Environment missing required key: %r" % key)
297
298    for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']:
299        assert key not in environ, (
300            "Environment should not have the key: %s "
301            "(use %s instead)" % (key, key[5:]))
302
303    if 'QUERY_STRING' not in environ:
304        warnings.warn(
305            'QUERY_STRING is not in the WSGI environment; the cgi '
306            'module will use sys.argv when this variable is missing, '
307            'so application errors are more likely',
308            WSGIWarning)
309
310    for key in environ.keys():
311        if '.' in key:
312            # Extension, we don't care about its type
313            continue
314        assert isinstance(environ[key], str), (
315            "Environmental variable %s is not a string: %r (value: %r)"
316            % (key, type(environ[key]), environ[key]))
317
318    assert isinstance(environ['wsgi.version'], tuple), (
319        "wsgi.version should be a tuple (%r)" % environ['wsgi.version'])
320    assert environ['wsgi.url_scheme'] in ('http', 'https'), (
321        "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme'])
322
323    check_input(environ['wsgi.input'])
324    check_errors(environ['wsgi.errors'])
325
326    # @@: these need filling out:
327    if environ['REQUEST_METHOD'] not in (
328        'GET', 'HEAD', 'POST', 'OPTIONS','PUT','DELETE','TRACE'):
329        warnings.warn(
330            "Unknown REQUEST_METHOD: %r" % environ['REQUEST_METHOD'],
331            WSGIWarning)
332
333    assert (not environ.get('SCRIPT_NAME')
334            or environ['SCRIPT_NAME'].startswith('/')), (
335        "SCRIPT_NAME doesn't start with /: %r" % environ['SCRIPT_NAME'])
336    assert (not environ.get('PATH_INFO')
337            or environ['PATH_INFO'].startswith('/')), (
338        "PATH_INFO doesn't start with /: %r" % environ['PATH_INFO'])
339    if environ.get('CONTENT_LENGTH'):
340        assert int(environ['CONTENT_LENGTH']) >= 0, (
341            "Invalid CONTENT_LENGTH: %r" % environ['CONTENT_LENGTH'])
342
343    if not environ.get('SCRIPT_NAME'):
344        assert 'PATH_INFO' in environ, (
345            "One of SCRIPT_NAME or PATH_INFO are required (PATH_INFO "
346            "should at least be '/' if SCRIPT_NAME is empty)")
347    assert environ.get('SCRIPT_NAME') != '/', (
348        "SCRIPT_NAME cannot be '/'; it should instead be '', and "
349        "PATH_INFO should be '/'")
350
351def check_input(wsgi_input):
352    for attr in ['read', 'readline', 'readlines', '__iter__']:
353        assert hasattr(wsgi_input, attr), (
354            "wsgi.input (%r) doesn't have the attribute %s"
355            % (wsgi_input, attr))
356
357def check_errors(wsgi_errors):
358    for attr in ['flush', 'write', 'writelines']:
359        assert hasattr(wsgi_errors, attr), (
360            "wsgi.errors (%r) doesn't have the attribute %s"
361            % (wsgi_errors, attr))
362
363def check_status(status):
364    assert isinstance(status, str), (
365        "Status must be a string (not %r)" % status)
366    # Implicitly check that we can turn it into an integer:
367    status_code = status.split(None, 1)[0]
368    assert len(status_code) == 3, (
369        "Status codes must be three characters: %r" % status_code)
370    status_int = int(status_code)
371    assert status_int >= 100, "Status code is invalid: %r" % status_int
372    if len(status) < 4 or status[3] != ' ':
373        warnings.warn(
374            "The status string (%r) should be a three-digit integer "
375            "followed by a single space and a status explanation"
376            % status, WSGIWarning)
377
378def check_headers(headers):
379    assert isinstance(headers,list), (
380        "Headers (%r) must be of type list: %r"
381        % (headers, type(headers)))
382    header_names = {}
383    for item in headers:
384        assert isinstance(item, tuple), (
385            "Individual headers (%r) must be of type tuple: %r"
386            % (item, type(item)))
387        assert len(item) == 2
388        name, value = item
389        assert name.lower() != 'status', (
390            "The Status header cannot be used; it conflicts with CGI "
391            "script, and HTTP status is not given through headers "
392            "(value: %r)." % value)
393        header_names[name.lower()] = None
394        assert '\n' not in name and ':' not in name, (
395            "Header names may not contain ':' or '\\n': %r" % name)
396        assert header_re.search(name), "Bad header name: %r" % name
397        assert not name.endswith('-') and not name.endswith('_'), (
398            "Names may not end in '-' or '_': %r" % name)
399        assert not bad_header_value_re.search(value), (
400            "Bad header value: %r (bad char: %r)"
401            % (value, bad_header_value_re.search(value).group(0)))
402
403def check_content_type(status, headers):
404    code = int(status.split(None, 1)[0])
405    # @@: need one more person to verify this interpretation of RFC 2616
406    #     http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
407    NO_MESSAGE_BODY = (204, 304)
408    NO_MESSAGE_TYPE = (204, 304)
409    for name, value in headers:
410        if name.lower() == 'content-type':
411            if code not in NO_MESSAGE_TYPE:
412                return
413            assert 0, (("Content-Type header found in a %s response, "
414                        "which must not return content.") % code)
415    if code not in NO_MESSAGE_BODY:
416        assert 0, "No Content-Type header found in headers (%s)" % headers
417
418def check_exc_info(exc_info):
419    assert exc_info is None or type(exc_info) is type(()), (
420        "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info)))
421    # More exc_info checks?
422
423def check_iterator(iterator):
424    # Technically a string is legal, which is why it's a really bad
425    # idea, because it may cause the response to be returned
426    # character-by-character
427    assert not isinstance(iterator, str), (
428        "You should not return a string as your application iterator, "
429        "instead return a single-item list containing that string.")
430
431def make_middleware(application, global_conf):
432    # @@: global_conf should be taken out of the middleware function,
433    # and isolated here
434    return middleware(application)
435
436make_middleware.__doc__ = __doc__
437
438__all__ = ['middleware', 'make_middleware']
439