1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3# (c) 2005 Ian Bicking and contributors 4# This module is part of the Python Paste Project and is released under 5# the MIT License: http://www.opensource.org/licenses/mit-license.php 6""" 7This module provides helper routines with work directly on a WSGI 8environment to solve common requirements. 9 10 * get_cookies(environ) 11 * parse_querystring(environ) 12 * parse_formvars(environ, include_get_vars=True) 13 * construct_url(environ, with_query_string=True, with_path_info=True, 14 script_name=None, path_info=None, querystring=None) 15 * path_info_split(path_info) 16 * path_info_pop(environ) 17 * resolve_relative_url(url, environ) 18 19""" 20import cgi 21from six.moves.urllib import parse as urlparse 22from six.moves.urllib.parse import quote, parse_qsl 23try: 24 # Python 3 25 from http.cookies import SimpleCookie, CookieError 26except ImportError: 27 # Python 2 28 from Cookie import SimpleCookie, CookieError 29 30try: 31 from UserDict import DictMixin 32except ImportError: 33 from collections import MutableMapping as DictMixin 34import six 35 36from paste.util.multidict import MultiDict 37 38__all__ = ['get_cookies', 'get_cookie_dict', 'parse_querystring', 39 'parse_formvars', 'construct_url', 'path_info_split', 40 'path_info_pop', 'resolve_relative_url', 'EnvironHeaders'] 41 42def get_cookies(environ): 43 """ 44 Gets a cookie object (which is a dictionary-like object) from the 45 request environment; caches this value in case get_cookies is 46 called again for the same request. 47 48 """ 49 header = environ.get('HTTP_COOKIE', '') 50 if 'paste.cookies' in environ: 51 cookies, check_header = environ['paste.cookies'] 52 if check_header == header: 53 return cookies 54 cookies = SimpleCookie() 55 try: 56 cookies.load(header) 57 except CookieError: 58 pass 59 environ['paste.cookies'] = (cookies, header) 60 return cookies 61 62def get_cookie_dict(environ): 63 """Return a *plain* dictionary of cookies as found in the request. 64 65 Unlike ``get_cookies`` this returns a dictionary, not a 66 ``SimpleCookie`` object. For incoming cookies a dictionary fully 67 represents the information. Like ``get_cookies`` this caches and 68 checks the cache. 69 """ 70 header = environ.get('HTTP_COOKIE') 71 if not header: 72 return {} 73 if 'paste.cookies.dict' in environ: 74 cookies, check_header = environ['paste.cookies.dict'] 75 if check_header == header: 76 return cookies 77 cookies = SimpleCookie() 78 try: 79 cookies.load(header) 80 except CookieError: 81 pass 82 result = {} 83 for name in cookies: 84 result[name] = cookies[name].value 85 environ['paste.cookies.dict'] = (result, header) 86 return result 87 88def parse_querystring(environ): 89 """ 90 Parses a query string into a list like ``[(name, value)]``. 91 Caches this value in case parse_querystring is called again 92 for the same request. 93 94 You can pass the result to ``dict()``, but be aware that keys that 95 appear multiple times will be lost (only the last value will be 96 preserved). 97 98 """ 99 source = environ.get('QUERY_STRING', '') 100 if not source: 101 return [] 102 if 'paste.parsed_querystring' in environ: 103 parsed, check_source = environ['paste.parsed_querystring'] 104 if check_source == source: 105 return parsed 106 parsed = parse_qsl(source, keep_blank_values=True, 107 strict_parsing=False) 108 environ['paste.parsed_querystring'] = (parsed, source) 109 return parsed 110 111def parse_dict_querystring(environ): 112 """Parses a query string like parse_querystring, but returns a MultiDict 113 114 Caches this value in case parse_dict_querystring is called again 115 for the same request. 116 117 Example:: 118 119 >>> environ = {'QUERY_STRING': 'day=Monday&user=fred&user=jane'} 120 >>> parsed = parse_dict_querystring(environ) 121 122 >>> parsed['day'] 123 'Monday' 124 >>> parsed['user'] 125 'fred' 126 >>> parsed.getall('user') 127 ['fred', 'jane'] 128 129 """ 130 source = environ.get('QUERY_STRING', '') 131 if not source: 132 return MultiDict() 133 if 'paste.parsed_dict_querystring' in environ: 134 parsed, check_source = environ['paste.parsed_dict_querystring'] 135 if check_source == source: 136 return parsed 137 parsed = parse_qsl(source, keep_blank_values=True, 138 strict_parsing=False) 139 multi = MultiDict(parsed) 140 environ['paste.parsed_dict_querystring'] = (multi, source) 141 return multi 142 143def parse_formvars(environ, include_get_vars=True): 144 """Parses the request, returning a MultiDict of form variables. 145 146 If ``include_get_vars`` is true then GET (query string) variables 147 will also be folded into the MultiDict. 148 149 All values should be strings, except for file uploads which are 150 left as ``FieldStorage`` instances. 151 152 If the request was not a normal form request (e.g., a POST with an 153 XML body) then ``environ['wsgi.input']`` won't be read. 154 """ 155 source = environ['wsgi.input'] 156 if 'paste.parsed_formvars' in environ: 157 parsed, check_source = environ['paste.parsed_formvars'] 158 if check_source == source: 159 if include_get_vars: 160 parsed.update(parse_querystring(environ)) 161 return parsed 162 # @@: Shouldn't bother FieldStorage parsing during GET/HEAD and 163 # fake_out_cgi requests 164 type = environ.get('CONTENT_TYPE', '').lower() 165 if ';' in type: 166 type = type.split(';', 1)[0] 167 fake_out_cgi = type not in ('', 'application/x-www-form-urlencoded', 168 'multipart/form-data') 169 # FieldStorage assumes a default CONTENT_LENGTH of -1, but a 170 # default of 0 is better: 171 if not environ.get('CONTENT_LENGTH'): 172 environ['CONTENT_LENGTH'] = '0' 173 # Prevent FieldStorage from parsing QUERY_STRING during GET/HEAD 174 # requests 175 old_query_string = environ.get('QUERY_STRING','') 176 environ['QUERY_STRING'] = '' 177 if fake_out_cgi: 178 input = six.BytesIO(b'') 179 old_content_type = environ.get('CONTENT_TYPE') 180 old_content_length = environ.get('CONTENT_LENGTH') 181 environ['CONTENT_LENGTH'] = '0' 182 environ['CONTENT_TYPE'] = '' 183 else: 184 input = environ['wsgi.input'] 185 fs = cgi.FieldStorage(fp=input, 186 environ=environ, 187 keep_blank_values=1) 188 environ['QUERY_STRING'] = old_query_string 189 if fake_out_cgi: 190 environ['CONTENT_TYPE'] = old_content_type 191 environ['CONTENT_LENGTH'] = old_content_length 192 formvars = MultiDict() 193 if isinstance(fs.value, list): 194 for name in fs.keys(): 195 values = fs[name] 196 if not isinstance(values, list): 197 values = [values] 198 for value in values: 199 if not value.filename: 200 value = value.value 201 formvars.add(name, value) 202 environ['paste.parsed_formvars'] = (formvars, source) 203 if include_get_vars: 204 formvars.update(parse_querystring(environ)) 205 return formvars 206 207def construct_url(environ, with_query_string=True, with_path_info=True, 208 script_name=None, path_info=None, querystring=None): 209 """Reconstructs the URL from the WSGI environment. 210 211 You may override SCRIPT_NAME, PATH_INFO, and QUERYSTRING with 212 the keyword arguments. 213 214 """ 215 url = environ['wsgi.url_scheme']+'://' 216 217 if environ.get('HTTP_HOST'): 218 host = environ['HTTP_HOST'] 219 port = None 220 if ':' in host: 221 host, port = host.split(':', 1) 222 if environ['wsgi.url_scheme'] == 'https': 223 if port == '443': 224 port = None 225 elif environ['wsgi.url_scheme'] == 'http': 226 if port == '80': 227 port = None 228 url += host 229 if port: 230 url += ':%s' % port 231 else: 232 url += environ['SERVER_NAME'] 233 if environ['wsgi.url_scheme'] == 'https': 234 if environ['SERVER_PORT'] != '443': 235 url += ':' + environ['SERVER_PORT'] 236 else: 237 if environ['SERVER_PORT'] != '80': 238 url += ':' + environ['SERVER_PORT'] 239 240 if script_name is None: 241 url += quote(environ.get('SCRIPT_NAME','')) 242 else: 243 url += quote(script_name) 244 if with_path_info: 245 if path_info is None: 246 url += quote(environ.get('PATH_INFO','')) 247 else: 248 url += quote(path_info) 249 if with_query_string: 250 if querystring is None: 251 if environ.get('QUERY_STRING'): 252 url += '?' + environ['QUERY_STRING'] 253 elif querystring: 254 url += '?' + querystring 255 return url 256 257def resolve_relative_url(url, environ): 258 """ 259 Resolve the given relative URL as being relative to the 260 location represented by the environment. This can be used 261 for redirecting to a relative path. Note: if url is already 262 absolute, this function will (intentionally) have no effect 263 on it. 264 265 """ 266 cur_url = construct_url(environ, with_query_string=False) 267 return urlparse.urljoin(cur_url, url) 268 269def path_info_split(path_info): 270 """ 271 Splits off the first segment of the path. Returns (first_part, 272 rest_of_path). first_part can be None (if PATH_INFO is empty), '' 273 (if PATH_INFO is '/'), or a name without any /'s. rest_of_path 274 can be '' or a string starting with /. 275 276 """ 277 if not path_info: 278 return None, '' 279 assert path_info.startswith('/'), ( 280 "PATH_INFO should start with /: %r" % path_info) 281 path_info = path_info.lstrip('/') 282 if '/' in path_info: 283 first, rest = path_info.split('/', 1) 284 return first, '/' + rest 285 else: 286 return path_info, '' 287 288def path_info_pop(environ): 289 """ 290 'Pops' off the next segment of PATH_INFO, pushing it onto 291 SCRIPT_NAME, and returning that segment. 292 293 For instance:: 294 295 >>> def call_it(script_name, path_info): 296 ... env = {'SCRIPT_NAME': script_name, 'PATH_INFO': path_info} 297 ... result = path_info_pop(env) 298 ... print('SCRIPT_NAME=%r; PATH_INFO=%r; returns=%r' % ( 299 ... env['SCRIPT_NAME'], env['PATH_INFO'], result)) 300 >>> call_it('/foo', '/bar') 301 SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns='bar' 302 >>> call_it('/foo/bar', '') 303 SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns=None 304 >>> call_it('/foo/bar', '/') 305 SCRIPT_NAME='/foo/bar/'; PATH_INFO=''; returns='' 306 >>> call_it('', '/1/2/3') 307 SCRIPT_NAME='/1'; PATH_INFO='/2/3'; returns='1' 308 >>> call_it('', '//1/2') 309 SCRIPT_NAME='//1'; PATH_INFO='/2'; returns='1' 310 311 """ 312 path = environ.get('PATH_INFO', '') 313 if not path: 314 return None 315 while path.startswith('/'): 316 environ['SCRIPT_NAME'] += '/' 317 path = path[1:] 318 if '/' not in path: 319 environ['SCRIPT_NAME'] += path 320 environ['PATH_INFO'] = '' 321 return path 322 else: 323 segment, path = path.split('/', 1) 324 environ['PATH_INFO'] = '/' + path 325 environ['SCRIPT_NAME'] += segment 326 return segment 327 328_parse_headers_special = { 329 # This is a Zope convention, but we'll allow it here: 330 'HTTP_CGI_AUTHORIZATION': 'Authorization', 331 'CONTENT_LENGTH': 'Content-Length', 332 'CONTENT_TYPE': 'Content-Type', 333 } 334 335def parse_headers(environ): 336 """ 337 Parse the headers in the environment (like ``HTTP_HOST``) and 338 yield a sequence of those (header_name, value) tuples. 339 """ 340 # @@: Maybe should parse out comma-separated headers? 341 for cgi_var, value in environ.iteritems(): 342 if cgi_var in _parse_headers_special: 343 yield _parse_headers_special[cgi_var], value 344 elif cgi_var.startswith('HTTP_'): 345 yield cgi_var[5:].title().replace('_', '-'), value 346 347class EnvironHeaders(DictMixin): 348 """An object that represents the headers as present in a 349 WSGI environment. 350 351 This object is a wrapper (with no internal state) for a WSGI 352 request object, representing the CGI-style HTTP_* keys as a 353 dictionary. Because a CGI environment can only hold one value for 354 each key, this dictionary is single-valued (unlike outgoing 355 headers). 356 """ 357 358 def __init__(self, environ): 359 self.environ = environ 360 361 def _trans_name(self, name): 362 key = 'HTTP_'+name.replace('-', '_').upper() 363 if key == 'HTTP_CONTENT_LENGTH': 364 key = 'CONTENT_LENGTH' 365 elif key == 'HTTP_CONTENT_TYPE': 366 key = 'CONTENT_TYPE' 367 return key 368 369 def _trans_key(self, key): 370 if key == 'CONTENT_TYPE': 371 return 'Content-Type' 372 elif key == 'CONTENT_LENGTH': 373 return 'Content-Length' 374 elif key.startswith('HTTP_'): 375 return key[5:].replace('_', '-').title() 376 else: 377 return None 378 379 def __len__(self): 380 return len(self.environ) 381 382 def __getitem__(self, item): 383 return self.environ[self._trans_name(item)] 384 385 def __setitem__(self, item, value): 386 # @@: Should this dictionary be writable at all? 387 self.environ[self._trans_name(item)] = value 388 389 def __delitem__(self, item): 390 del self.environ[self._trans_name(item)] 391 392 def __iter__(self): 393 for key in self.environ: 394 name = self._trans_key(key) 395 if name is not None: 396 yield name 397 398 def keys(self): 399 return list(iter(self)) 400 401 def __contains__(self, item): 402 return self._trans_name(item) in self.environ 403 404def _cgi_FieldStorage__repr__patch(self): 405 """ monkey patch for FieldStorage.__repr__ 406 407 Unbelievely, the default __repr__ on FieldStorage reads 408 the entire file content instead of being sane about it. 409 This is a simple replacement that doesn't do that 410 """ 411 if self.file: 412 return "FieldStorage(%r, %r)" % ( 413 self.name, self.filename) 414 return "FieldStorage(%r, %r, %r)" % ( 415 self.name, self.filename, self.value) 416 417cgi.FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch 418 419if __name__ == '__main__': 420 import doctest 421 doctest.testmod() 422