1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3# (c) 2005 Ian Bicking and contributors
4# This module is part of the Python Paste Project and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
6"""
7This module provides helper routines with work directly on a WSGI
8environment to solve common requirements.
9
10   * get_cookies(environ)
11   * parse_querystring(environ)
12   * parse_formvars(environ, include_get_vars=True)
13   * construct_url(environ, with_query_string=True, with_path_info=True,
14                   script_name=None, path_info=None, querystring=None)
15   * path_info_split(path_info)
16   * path_info_pop(environ)
17   * resolve_relative_url(url, environ)
18
19"""
20import cgi
21from six.moves.urllib import parse as urlparse
22from six.moves.urllib.parse import quote, parse_qsl
23try:
24    # Python 3
25    from http.cookies import SimpleCookie, CookieError
26except ImportError:
27    # Python 2
28    from Cookie import SimpleCookie, CookieError
29
30try:
31    from UserDict import DictMixin
32except ImportError:
33    from collections import MutableMapping as DictMixin
34import six
35
36from paste.util.multidict import MultiDict
37
38__all__ = ['get_cookies', 'get_cookie_dict', 'parse_querystring',
39           'parse_formvars', 'construct_url', 'path_info_split',
40           'path_info_pop', 'resolve_relative_url', 'EnvironHeaders']
41
42def get_cookies(environ):
43    """
44    Gets a cookie object (which is a dictionary-like object) from the
45    request environment; caches this value in case get_cookies is
46    called again for the same request.
47
48    """
49    header = environ.get('HTTP_COOKIE', '')
50    if 'paste.cookies' in environ:
51        cookies, check_header = environ['paste.cookies']
52        if check_header == header:
53            return cookies
54    cookies = SimpleCookie()
55    try:
56        cookies.load(header)
57    except CookieError:
58        pass
59    environ['paste.cookies'] = (cookies, header)
60    return cookies
61
62def get_cookie_dict(environ):
63    """Return a *plain* dictionary of cookies as found in the request.
64
65    Unlike ``get_cookies`` this returns a dictionary, not a
66    ``SimpleCookie`` object.  For incoming cookies a dictionary fully
67    represents the information.  Like ``get_cookies`` this caches and
68    checks the cache.
69    """
70    header = environ.get('HTTP_COOKIE')
71    if not header:
72        return {}
73    if 'paste.cookies.dict' in environ:
74        cookies, check_header = environ['paste.cookies.dict']
75        if check_header == header:
76            return cookies
77    cookies = SimpleCookie()
78    try:
79        cookies.load(header)
80    except CookieError:
81        pass
82    result = {}
83    for name in cookies:
84        result[name] = cookies[name].value
85    environ['paste.cookies.dict'] = (result, header)
86    return result
87
88def parse_querystring(environ):
89    """
90    Parses a query string into a list like ``[(name, value)]``.
91    Caches this value in case parse_querystring is called again
92    for the same request.
93
94    You can pass the result to ``dict()``, but be aware that keys that
95    appear multiple times will be lost (only the last value will be
96    preserved).
97
98    """
99    source = environ.get('QUERY_STRING', '')
100    if not source:
101        return []
102    if 'paste.parsed_querystring' in environ:
103        parsed, check_source = environ['paste.parsed_querystring']
104        if check_source == source:
105            return parsed
106    parsed = parse_qsl(source, keep_blank_values=True,
107                       strict_parsing=False)
108    environ['paste.parsed_querystring'] = (parsed, source)
109    return parsed
110
111def parse_dict_querystring(environ):
112    """Parses a query string like parse_querystring, but returns a MultiDict
113
114    Caches this value in case parse_dict_querystring is called again
115    for the same request.
116
117    Example::
118
119        >>> environ = {'QUERY_STRING': 'day=Monday&user=fred&user=jane'}
120        >>> parsed = parse_dict_querystring(environ)
121
122        >>> parsed['day']
123        'Monday'
124        >>> parsed['user']
125        'fred'
126        >>> parsed.getall('user')
127        ['fred', 'jane']
128
129    """
130    source = environ.get('QUERY_STRING', '')
131    if not source:
132        return MultiDict()
133    if 'paste.parsed_dict_querystring' in environ:
134        parsed, check_source = environ['paste.parsed_dict_querystring']
135        if check_source == source:
136            return parsed
137    parsed = parse_qsl(source, keep_blank_values=True,
138                       strict_parsing=False)
139    multi = MultiDict(parsed)
140    environ['paste.parsed_dict_querystring'] = (multi, source)
141    return multi
142
143def parse_formvars(environ, include_get_vars=True):
144    """Parses the request, returning a MultiDict of form variables.
145
146    If ``include_get_vars`` is true then GET (query string) variables
147    will also be folded into the MultiDict.
148
149    All values should be strings, except for file uploads which are
150    left as ``FieldStorage`` instances.
151
152    If the request was not a normal form request (e.g., a POST with an
153    XML body) then ``environ['wsgi.input']`` won't be read.
154    """
155    source = environ['wsgi.input']
156    if 'paste.parsed_formvars' in environ:
157        parsed, check_source = environ['paste.parsed_formvars']
158        if check_source == source:
159            if include_get_vars:
160                parsed.update(parse_querystring(environ))
161            return parsed
162    # @@: Shouldn't bother FieldStorage parsing during GET/HEAD and
163    # fake_out_cgi requests
164    type = environ.get('CONTENT_TYPE', '').lower()
165    if ';' in type:
166        type = type.split(';', 1)[0]
167    fake_out_cgi = type not in ('', 'application/x-www-form-urlencoded',
168                                'multipart/form-data')
169    # FieldStorage assumes a default CONTENT_LENGTH of -1, but a
170    # default of 0 is better:
171    if not environ.get('CONTENT_LENGTH'):
172        environ['CONTENT_LENGTH'] = '0'
173    # Prevent FieldStorage from parsing QUERY_STRING during GET/HEAD
174    # requests
175    old_query_string = environ.get('QUERY_STRING','')
176    environ['QUERY_STRING'] = ''
177    if fake_out_cgi:
178        input = six.BytesIO(b'')
179        old_content_type = environ.get('CONTENT_TYPE')
180        old_content_length = environ.get('CONTENT_LENGTH')
181        environ['CONTENT_LENGTH'] = '0'
182        environ['CONTENT_TYPE'] = ''
183    else:
184        input = environ['wsgi.input']
185    fs = cgi.FieldStorage(fp=input,
186                          environ=environ,
187                          keep_blank_values=1)
188    environ['QUERY_STRING'] = old_query_string
189    if fake_out_cgi:
190        environ['CONTENT_TYPE'] = old_content_type
191        environ['CONTENT_LENGTH'] = old_content_length
192    formvars = MultiDict()
193    if isinstance(fs.value, list):
194        for name in fs.keys():
195            values = fs[name]
196            if not isinstance(values, list):
197                values = [values]
198            for value in values:
199                if not value.filename:
200                    value = value.value
201                formvars.add(name, value)
202    environ['paste.parsed_formvars'] = (formvars, source)
203    if include_get_vars:
204        formvars.update(parse_querystring(environ))
205    return formvars
206
207def construct_url(environ, with_query_string=True, with_path_info=True,
208                  script_name=None, path_info=None, querystring=None):
209    """Reconstructs the URL from the WSGI environment.
210
211    You may override SCRIPT_NAME, PATH_INFO, and QUERYSTRING with
212    the keyword arguments.
213
214    """
215    url = environ['wsgi.url_scheme']+'://'
216
217    if environ.get('HTTP_HOST'):
218        host = environ['HTTP_HOST']
219        port = None
220        if ':' in host:
221            host, port = host.split(':', 1)
222            if environ['wsgi.url_scheme'] == 'https':
223                if port == '443':
224                    port = None
225            elif environ['wsgi.url_scheme'] == 'http':
226                if port == '80':
227                    port = None
228        url += host
229        if port:
230            url += ':%s' % port
231    else:
232        url += environ['SERVER_NAME']
233        if environ['wsgi.url_scheme'] == 'https':
234            if environ['SERVER_PORT'] != '443':
235                url += ':' + environ['SERVER_PORT']
236        else:
237            if environ['SERVER_PORT'] != '80':
238                url += ':' + environ['SERVER_PORT']
239
240    if script_name is None:
241        url += quote(environ.get('SCRIPT_NAME',''))
242    else:
243        url += quote(script_name)
244    if with_path_info:
245        if path_info is None:
246            url += quote(environ.get('PATH_INFO',''))
247        else:
248            url += quote(path_info)
249    if with_query_string:
250        if querystring is None:
251            if environ.get('QUERY_STRING'):
252                url += '?' + environ['QUERY_STRING']
253        elif querystring:
254            url += '?' + querystring
255    return url
256
257def resolve_relative_url(url, environ):
258    """
259    Resolve the given relative URL as being relative to the
260    location represented by the environment.  This can be used
261    for redirecting to a relative path.  Note: if url is already
262    absolute, this function will (intentionally) have no effect
263    on it.
264
265    """
266    cur_url = construct_url(environ, with_query_string=False)
267    return urlparse.urljoin(cur_url, url)
268
269def path_info_split(path_info):
270    """
271    Splits off the first segment of the path.  Returns (first_part,
272    rest_of_path).  first_part can be None (if PATH_INFO is empty), ''
273    (if PATH_INFO is '/'), or a name without any /'s.  rest_of_path
274    can be '' or a string starting with /.
275
276    """
277    if not path_info:
278        return None, ''
279    assert path_info.startswith('/'), (
280        "PATH_INFO should start with /: %r" % path_info)
281    path_info = path_info.lstrip('/')
282    if '/' in path_info:
283        first, rest = path_info.split('/', 1)
284        return first, '/' + rest
285    else:
286        return path_info, ''
287
288def path_info_pop(environ):
289    """
290    'Pops' off the next segment of PATH_INFO, pushing it onto
291    SCRIPT_NAME, and returning that segment.
292
293    For instance::
294
295        >>> def call_it(script_name, path_info):
296        ...     env = {'SCRIPT_NAME': script_name, 'PATH_INFO': path_info}
297        ...     result = path_info_pop(env)
298        ...     print('SCRIPT_NAME=%r; PATH_INFO=%r; returns=%r' % (
299        ...         env['SCRIPT_NAME'], env['PATH_INFO'], result))
300        >>> call_it('/foo', '/bar')
301        SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns='bar'
302        >>> call_it('/foo/bar', '')
303        SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns=None
304        >>> call_it('/foo/bar', '/')
305        SCRIPT_NAME='/foo/bar/'; PATH_INFO=''; returns=''
306        >>> call_it('', '/1/2/3')
307        SCRIPT_NAME='/1'; PATH_INFO='/2/3'; returns='1'
308        >>> call_it('', '//1/2')
309        SCRIPT_NAME='//1'; PATH_INFO='/2'; returns='1'
310
311    """
312    path = environ.get('PATH_INFO', '')
313    if not path:
314        return None
315    while path.startswith('/'):
316        environ['SCRIPT_NAME'] += '/'
317        path = path[1:]
318    if '/' not in path:
319        environ['SCRIPT_NAME'] += path
320        environ['PATH_INFO'] = ''
321        return path
322    else:
323        segment, path = path.split('/', 1)
324        environ['PATH_INFO'] = '/' + path
325        environ['SCRIPT_NAME'] += segment
326        return segment
327
328_parse_headers_special = {
329    # This is a Zope convention, but we'll allow it here:
330    'HTTP_CGI_AUTHORIZATION': 'Authorization',
331    'CONTENT_LENGTH': 'Content-Length',
332    'CONTENT_TYPE': 'Content-Type',
333    }
334
335def parse_headers(environ):
336    """
337    Parse the headers in the environment (like ``HTTP_HOST``) and
338    yield a sequence of those (header_name, value) tuples.
339    """
340    # @@: Maybe should parse out comma-separated headers?
341    for cgi_var, value in environ.iteritems():
342        if cgi_var in _parse_headers_special:
343            yield _parse_headers_special[cgi_var], value
344        elif cgi_var.startswith('HTTP_'):
345            yield cgi_var[5:].title().replace('_', '-'), value
346
347class EnvironHeaders(DictMixin):
348    """An object that represents the headers as present in a
349    WSGI environment.
350
351    This object is a wrapper (with no internal state) for a WSGI
352    request object, representing the CGI-style HTTP_* keys as a
353    dictionary.  Because a CGI environment can only hold one value for
354    each key, this dictionary is single-valued (unlike outgoing
355    headers).
356    """
357
358    def __init__(self, environ):
359        self.environ = environ
360
361    def _trans_name(self, name):
362        key = 'HTTP_'+name.replace('-', '_').upper()
363        if key == 'HTTP_CONTENT_LENGTH':
364            key = 'CONTENT_LENGTH'
365        elif key == 'HTTP_CONTENT_TYPE':
366            key = 'CONTENT_TYPE'
367        return key
368
369    def _trans_key(self, key):
370        if key == 'CONTENT_TYPE':
371            return 'Content-Type'
372        elif key == 'CONTENT_LENGTH':
373            return 'Content-Length'
374        elif key.startswith('HTTP_'):
375            return key[5:].replace('_', '-').title()
376        else:
377            return None
378
379    def __len__(self):
380        return len(self.environ)
381
382    def __getitem__(self, item):
383        return self.environ[self._trans_name(item)]
384
385    def __setitem__(self, item, value):
386        # @@: Should this dictionary be writable at all?
387        self.environ[self._trans_name(item)] = value
388
389    def __delitem__(self, item):
390        del self.environ[self._trans_name(item)]
391
392    def __iter__(self):
393        for key in self.environ:
394            name = self._trans_key(key)
395            if name is not None:
396                yield name
397
398    def keys(self):
399        return list(iter(self))
400
401    def __contains__(self, item):
402        return self._trans_name(item) in self.environ
403
404def _cgi_FieldStorage__repr__patch(self):
405    """ monkey patch for FieldStorage.__repr__
406
407    Unbelievely, the default __repr__ on FieldStorage reads
408    the entire file content instead of being sane about it.
409    This is a simple replacement that doesn't do that
410    """
411    if self.file:
412        return "FieldStorage(%r, %r)" % (
413                self.name, self.filename)
414    return "FieldStorage(%r, %r, %r)" % (
415             self.name, self.filename, self.value)
416
417cgi.FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch
418
419if __name__ == '__main__':
420    import doctest
421    doctest.testmod()
422