1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3""" 4WSGI applications that parse the URL and dispatch to on-disk resources 5""" 6 7import os 8import six 9import sys 10import imp 11import mimetypes 12try: 13 import pkg_resources 14except ImportError: 15 pkg_resources = None 16from paste import request 17from paste import fileapp 18from paste.util import import_string 19from paste import httpexceptions 20from .httpheaders import ETAG 21from paste.util import converters 22 23class NoDefault(object): 24 pass 25 26__all__ = ['URLParser', 'StaticURLParser', 'PkgResourcesParser'] 27 28class URLParser(object): 29 30 """ 31 WSGI middleware 32 33 Application dispatching, based on URL. An instance of `URLParser` is 34 an application that loads and delegates to other applications. It 35 looks for files in its directory that match the first part of 36 PATH_INFO; these may have an extension, but are not required to have 37 one, in which case the available files are searched to find the 38 appropriate file. If it is ambiguous, a 404 is returned and an error 39 logged. 40 41 By default there is a constructor for .py files that loads the module, 42 and looks for an attribute ``application``, which is a ready 43 application object, or an attribute that matches the module name, 44 which is a factory for building applications, and is called with no 45 arguments. 46 47 URLParser will also look in __init__.py for special overrides. 48 These overrides are: 49 50 ``urlparser_hook(environ)`` 51 This can modify the environment. Its return value is ignored, 52 and it cannot be used to change the response in any way. You 53 *can* use this, for example, to manipulate SCRIPT_NAME/PATH_INFO 54 (try to keep them consistent with the original URL -- but 55 consuming PATH_INFO and moving that to SCRIPT_NAME is ok). 56 57 ``urlparser_wrap(environ, start_response, app)``: 58 After URLParser finds the application, it calls this function 59 (if present). If this function doesn't call 60 ``app(environ, start_response)`` then the application won't be 61 called at all! This can be used to allocate resources (with 62 ``try:finally:``) or otherwise filter the output of the 63 application. 64 65 ``not_found_hook(environ, start_response)``: 66 If no file can be found (*in this directory*) to match the 67 request, then this WSGI application will be called. You can 68 use this to change the URL and pass the request back to 69 URLParser again, or on to some other application. This 70 doesn't catch all ``404 Not Found`` responses, just missing 71 files. 72 73 ``application(environ, start_response)``: 74 This basically overrides URLParser completely, and the given 75 application is used for all requests. ``urlparser_wrap`` and 76 ``urlparser_hook`` are still called, but the filesystem isn't 77 searched in any way. 78 """ 79 80 parsers_by_directory = {} 81 82 # This is lazily initialized 83 init_module = NoDefault 84 85 global_constructors = {} 86 87 def __init__(self, global_conf, 88 directory, base_python_name, 89 index_names=NoDefault, 90 hide_extensions=NoDefault, 91 ignore_extensions=NoDefault, 92 constructors=None, 93 **constructor_conf): 94 """ 95 Create a URLParser object that looks at `directory`. 96 `base_python_name` is the package that this directory 97 represents, thus any Python modules in this directory will 98 be given names under this package. 99 """ 100 if global_conf: 101 import warnings 102 warnings.warn( 103 'The global_conf argument to URLParser is deprecated; ' 104 'either pass in None or {}, or use make_url_parser', 105 DeprecationWarning) 106 else: 107 global_conf = {} 108 if os.path.sep != '/': 109 directory = directory.replace(os.path.sep, '/') 110 self.directory = directory 111 self.base_python_name = base_python_name 112 # This logic here should be deprecated since it is in 113 # make_url_parser 114 if index_names is NoDefault: 115 index_names = global_conf.get( 116 'index_names', ('index', 'Index', 'main', 'Main')) 117 self.index_names = converters.aslist(index_names) 118 if hide_extensions is NoDefault: 119 hide_extensions = global_conf.get( 120 'hide_extensions', ('.pyc', '.bak', '.py~', '.pyo')) 121 self.hide_extensions = converters.aslist(hide_extensions) 122 if ignore_extensions is NoDefault: 123 ignore_extensions = global_conf.get( 124 'ignore_extensions', ()) 125 self.ignore_extensions = converters.aslist(ignore_extensions) 126 self.constructors = self.global_constructors.copy() 127 if constructors: 128 self.constructors.update(constructors) 129 # @@: Should we also check the global options for constructors? 130 for name, value in constructor_conf.items(): 131 if not name.startswith('constructor '): 132 raise ValueError( 133 "Only extra configuration keys allowed are " 134 "'constructor .ext = import_expr'; you gave %r " 135 "(=%r)" % (name, value)) 136 ext = name[len('constructor '):].strip() 137 if isinstance(value, (str, unicode)): 138 value = import_string.eval_import(value) 139 self.constructors[ext] = value 140 141 def __call__(self, environ, start_response): 142 environ['paste.urlparser.base_python_name'] = self.base_python_name 143 if self.init_module is NoDefault: 144 self.init_module = self.find_init_module(environ) 145 path_info = environ.get('PATH_INFO', '') 146 if not path_info: 147 return self.add_slash(environ, start_response) 148 if (self.init_module 149 and getattr(self.init_module, 'urlparser_hook', None)): 150 self.init_module.urlparser_hook(environ) 151 orig_path_info = environ['PATH_INFO'] 152 orig_script_name = environ['SCRIPT_NAME'] 153 application, filename = self.find_application(environ) 154 if not application: 155 if (self.init_module 156 and getattr(self.init_module, 'not_found_hook', None) 157 and environ.get('paste.urlparser.not_found_parser') is not self): 158 not_found_hook = self.init_module.not_found_hook 159 environ['paste.urlparser.not_found_parser'] = self 160 environ['PATH_INFO'] = orig_path_info 161 environ['SCRIPT_NAME'] = orig_script_name 162 return not_found_hook(environ, start_response) 163 if filename is None: 164 name, rest_of_path = request.path_info_split(environ['PATH_INFO']) 165 if not name: 166 name = 'one of %s' % ', '.join( 167 self.index_names or 168 ['(no index_names defined)']) 169 170 return self.not_found( 171 environ, start_response, 172 'Tried to load %s from directory %s' 173 % (name, self.directory)) 174 else: 175 environ['wsgi.errors'].write( 176 'Found resource %s, but could not construct application\n' 177 % filename) 178 return self.not_found( 179 environ, start_response, 180 'Tried to load %s from directory %s' 181 % (filename, self.directory)) 182 if (self.init_module 183 and getattr(self.init_module, 'urlparser_wrap', None)): 184 return self.init_module.urlparser_wrap( 185 environ, start_response, application) 186 else: 187 return application(environ, start_response) 188 189 def find_application(self, environ): 190 if (self.init_module 191 and getattr(self.init_module, 'application', None) 192 and not environ.get('paste.urlparser.init_application') == environ['SCRIPT_NAME']): 193 environ['paste.urlparser.init_application'] = environ['SCRIPT_NAME'] 194 return self.init_module.application, None 195 name, rest_of_path = request.path_info_split(environ['PATH_INFO']) 196 environ['PATH_INFO'] = rest_of_path 197 if name is not None: 198 environ['SCRIPT_NAME'] = environ.get('SCRIPT_NAME', '') + '/' + name 199 if not name: 200 names = self.index_names 201 for index_name in names: 202 filename = self.find_file(environ, index_name) 203 if filename: 204 break 205 else: 206 # None of the index files found 207 filename = None 208 else: 209 filename = self.find_file(environ, name) 210 if filename is None: 211 return None, filename 212 else: 213 return self.get_application(environ, filename), filename 214 215 def not_found(self, environ, start_response, debug_message=None): 216 exc = httpexceptions.HTTPNotFound( 217 'The resource at %s could not be found' 218 % request.construct_url(environ), 219 comment=debug_message) 220 return exc.wsgi_application(environ, start_response) 221 222 def add_slash(self, environ, start_response): 223 """ 224 This happens when you try to get to a directory 225 without a trailing / 226 """ 227 url = request.construct_url(environ, with_query_string=False) 228 url += '/' 229 if environ.get('QUERY_STRING'): 230 url += '?' + environ['QUERY_STRING'] 231 exc = httpexceptions.HTTPMovedPermanently( 232 'The resource has moved to %s - you should be redirected ' 233 'automatically.' % url, 234 headers=[('location', url)]) 235 return exc.wsgi_application(environ, start_response) 236 237 def find_file(self, environ, base_filename): 238 possible = [] 239 """Cache a few values to reduce function call overhead""" 240 for filename in os.listdir(self.directory): 241 base, ext = os.path.splitext(filename) 242 full_filename = os.path.join(self.directory, filename) 243 if (ext in self.hide_extensions 244 or not base): 245 continue 246 if filename == base_filename: 247 possible.append(full_filename) 248 continue 249 if ext in self.ignore_extensions: 250 continue 251 if base == base_filename: 252 possible.append(full_filename) 253 if not possible: 254 #environ['wsgi.errors'].write( 255 # 'No file found matching %r in %s\n' 256 # % (base_filename, self.directory)) 257 return None 258 if len(possible) > 1: 259 # If there is an exact match, this isn't 'ambiguous' 260 # per se; it might mean foo.gif and foo.gif.back for 261 # instance 262 if full_filename in possible: 263 return full_filename 264 else: 265 environ['wsgi.errors'].write( 266 'Ambiguous URL: %s; matches files %s\n' 267 % (request.construct_url(environ), 268 ', '.join(possible))) 269 return None 270 return possible[0] 271 272 def get_application(self, environ, filename): 273 if os.path.isdir(filename): 274 t = 'dir' 275 else: 276 t = os.path.splitext(filename)[1] 277 constructor = self.constructors.get(t, self.constructors.get('*')) 278 if constructor is None: 279 #environ['wsgi.errors'].write( 280 # 'No constructor found for %s\n' % t) 281 return constructor 282 app = constructor(self, environ, filename) 283 if app is None: 284 #environ['wsgi.errors'].write( 285 # 'Constructor %s return None for %s\n' % 286 # (constructor, filename)) 287 pass 288 return app 289 290 def register_constructor(cls, extension, constructor): 291 """ 292 Register a function as a constructor. Registered constructors 293 apply to all instances of `URLParser`. 294 295 The extension should have a leading ``.``, or the special 296 extensions ``dir`` (for directories) and ``*`` (a catch-all). 297 298 `constructor` must be a callable that takes two arguments: 299 ``environ`` and ``filename``, and returns a WSGI application. 300 """ 301 d = cls.global_constructors 302 assert extension not in d, ( 303 "A constructor already exists for the extension %r (%r) " 304 "when attemption to register constructor %r" 305 % (extension, d[extension], constructor)) 306 d[extension] = constructor 307 register_constructor = classmethod(register_constructor) 308 309 def get_parser(self, directory, base_python_name): 310 """ 311 Get a parser for the given directory, or create one if 312 necessary. This way parsers can be cached and reused. 313 314 # @@: settings are inherited from the first caller 315 """ 316 try: 317 return self.parsers_by_directory[(directory, base_python_name)] 318 except KeyError: 319 parser = self.__class__( 320 {}, 321 directory, base_python_name, 322 index_names=self.index_names, 323 hide_extensions=self.hide_extensions, 324 ignore_extensions=self.ignore_extensions, 325 constructors=self.constructors) 326 self.parsers_by_directory[(directory, base_python_name)] = parser 327 return parser 328 329 def find_init_module(self, environ): 330 filename = os.path.join(self.directory, '__init__.py') 331 if not os.path.exists(filename): 332 return None 333 return load_module(environ, filename) 334 335 def __repr__(self): 336 return '<%s directory=%r; module=%s at %s>' % ( 337 self.__class__.__name__, 338 self.directory, 339 self.base_python_name, 340 hex(abs(id(self)))) 341 342def make_directory(parser, environ, filename): 343 base_python_name = environ['paste.urlparser.base_python_name'] 344 if base_python_name: 345 base_python_name += "." + os.path.basename(filename) 346 else: 347 base_python_name = os.path.basename(filename) 348 return parser.get_parser(filename, base_python_name) 349 350URLParser.register_constructor('dir', make_directory) 351 352def make_unknown(parser, environ, filename): 353 return fileapp.FileApp(filename) 354 355URLParser.register_constructor('*', make_unknown) 356 357def load_module(environ, filename): 358 base_python_name = environ['paste.urlparser.base_python_name'] 359 module_name = os.path.splitext(os.path.basename(filename))[0] 360 if base_python_name: 361 module_name = base_python_name + '.' + module_name 362 return load_module_from_name(environ, filename, module_name, 363 environ['wsgi.errors']) 364 365def load_module_from_name(environ, filename, module_name, errors): 366 if module_name in sys.modules: 367 return sys.modules[module_name] 368 init_filename = os.path.join(os.path.dirname(filename), '__init__.py') 369 if not os.path.exists(init_filename): 370 try: 371 f = open(init_filename, 'w') 372 except (OSError, IOError) as e: 373 errors.write( 374 'Cannot write __init__.py file into directory %s (%s)\n' 375 % (os.path.dirname(filename), e)) 376 return None 377 f.write('#\n') 378 f.close() 379 fp = None 380 if module_name in sys.modules: 381 return sys.modules[module_name] 382 if '.' in module_name: 383 parent_name = '.'.join(module_name.split('.')[:-1]) 384 base_name = module_name.split('.')[-1] 385 parent = load_module_from_name(environ, os.path.dirname(filename), 386 parent_name, errors) 387 else: 388 base_name = module_name 389 fp = None 390 try: 391 fp, pathname, stuff = imp.find_module( 392 base_name, [os.path.dirname(filename)]) 393 module = imp.load_module(module_name, fp, pathname, stuff) 394 finally: 395 if fp is not None: 396 fp.close() 397 return module 398 399def make_py(parser, environ, filename): 400 module = load_module(environ, filename) 401 if not module: 402 return None 403 if hasattr(module, 'application') and module.application: 404 return getattr(module.application, 'wsgi_application', module.application) 405 base_name = module.__name__.split('.')[-1] 406 if hasattr(module, base_name): 407 obj = getattr(module, base_name) 408 if hasattr(obj, 'wsgi_application'): 409 return obj.wsgi_application 410 else: 411 # @@: Old behavior; should probably be deprecated eventually: 412 return getattr(module, base_name)() 413 environ['wsgi.errors'].write( 414 "Cound not find application or %s in %s\n" 415 % (base_name, module)) 416 return None 417 418URLParser.register_constructor('.py', make_py) 419 420class StaticURLParser(object): 421 """ 422 Like ``URLParser`` but only serves static files. 423 424 ``cache_max_age``: 425 integer specifies Cache-Control max_age in seconds 426 """ 427 # @@: Should URLParser subclass from this? 428 429 def __init__(self, directory, root_directory=None, 430 cache_max_age=None): 431 self.directory = self.normpath(directory) 432 self.root_directory = self.normpath(root_directory or directory) 433 self.cache_max_age = cache_max_age 434 435 def normpath(path): 436 return os.path.normcase(os.path.abspath(path)) 437 normpath = staticmethod(normpath) 438 439 def __call__(self, environ, start_response): 440 path_info = environ.get('PATH_INFO', '') 441 if not path_info: 442 return self.add_slash(environ, start_response) 443 if path_info == '/': 444 # @@: This should obviously be configurable 445 filename = 'index.html' 446 else: 447 filename = request.path_info_pop(environ) 448 full = self.normpath(os.path.join(self.directory, filename)) 449 if not full.startswith(self.root_directory): 450 # Out of bounds 451 return self.not_found(environ, start_response) 452 if not os.path.exists(full): 453 return self.not_found(environ, start_response) 454 if os.path.isdir(full): 455 # @@: Cache? 456 return self.__class__(full, root_directory=self.root_directory, 457 cache_max_age=self.cache_max_age)(environ, 458 start_response) 459 if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/': 460 return self.error_extra_path(environ, start_response) 461 if_none_match = environ.get('HTTP_IF_NONE_MATCH') 462 if if_none_match: 463 mytime = os.stat(full).st_mtime 464 if str(mytime) == if_none_match: 465 headers = [] 466 ## FIXME: probably should be 467 ## ETAG.update(headers, '"%s"' % mytime) 468 ETAG.update(headers, mytime) 469 start_response('304 Not Modified', headers) 470 return [''] # empty body 471 472 fa = self.make_app(full) 473 if self.cache_max_age: 474 fa.cache_control(max_age=self.cache_max_age) 475 return fa(environ, start_response) 476 477 def make_app(self, filename): 478 return fileapp.FileApp(filename) 479 480 def add_slash(self, environ, start_response): 481 """ 482 This happens when you try to get to a directory 483 without a trailing / 484 """ 485 url = request.construct_url(environ, with_query_string=False) 486 url += '/' 487 if environ.get('QUERY_STRING'): 488 url += '?' + environ['QUERY_STRING'] 489 exc = httpexceptions.HTTPMovedPermanently( 490 'The resource has moved to %s - you should be redirected ' 491 'automatically.' % url, 492 headers=[('location', url)]) 493 return exc.wsgi_application(environ, start_response) 494 495 def not_found(self, environ, start_response, debug_message=None): 496 exc = httpexceptions.HTTPNotFound( 497 'The resource at %s could not be found' 498 % request.construct_url(environ), 499 comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s' 500 % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'), 501 self.directory, debug_message or '(none)')) 502 return exc.wsgi_application(environ, start_response) 503 504 def error_extra_path(self, environ, start_response): 505 exc = httpexceptions.HTTPNotFound( 506 'The trailing path %r is not allowed' % environ['PATH_INFO']) 507 return exc.wsgi_application(environ, start_response) 508 509 def __repr__(self): 510 return '<%s %r>' % (self.__class__.__name__, self.directory) 511 512def make_static(global_conf, document_root, cache_max_age=None): 513 """ 514 Return a WSGI application that serves a directory (configured 515 with document_root) 516 517 cache_max_age - integer specifies CACHE_CONTROL max_age in seconds 518 """ 519 if cache_max_age is not None: 520 cache_max_age = int(cache_max_age) 521 return StaticURLParser( 522 document_root, cache_max_age=cache_max_age) 523 524class PkgResourcesParser(StaticURLParser): 525 526 def __init__(self, egg_or_spec, resource_name, manager=None, root_resource=None): 527 if pkg_resources is None: 528 raise NotImplementedError("This class requires pkg_resources.") 529 if isinstance(egg_or_spec, (six.binary_type, six.text_type)): 530 self.egg = pkg_resources.get_distribution(egg_or_spec) 531 else: 532 self.egg = egg_or_spec 533 self.resource_name = resource_name 534 if manager is None: 535 manager = pkg_resources.ResourceManager() 536 self.manager = manager 537 if root_resource is None: 538 root_resource = resource_name 539 self.root_resource = os.path.normpath(root_resource) 540 541 def __repr__(self): 542 return '<%s for %s:%r>' % ( 543 self.__class__.__name__, 544 self.egg.project_name, 545 self.resource_name) 546 547 def __call__(self, environ, start_response): 548 path_info = environ.get('PATH_INFO', '') 549 if not path_info: 550 return self.add_slash(environ, start_response) 551 if path_info == '/': 552 # @@: This should obviously be configurable 553 filename = 'index.html' 554 else: 555 filename = request.path_info_pop(environ) 556 resource = os.path.normcase(os.path.normpath( 557 self.resource_name + '/' + filename)) 558 if self.root_resource is not None and not resource.startswith(self.root_resource): 559 # Out of bounds 560 return self.not_found(environ, start_response) 561 if not self.egg.has_resource(resource): 562 return self.not_found(environ, start_response) 563 if self.egg.resource_isdir(resource): 564 # @@: Cache? 565 child_root = self.root_resource is not None and self.root_resource or \ 566 self.resource_name 567 return self.__class__(self.egg, resource, self.manager, 568 root_resource=child_root)(environ, start_response) 569 if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/': 570 return self.error_extra_path(environ, start_response) 571 572 type, encoding = mimetypes.guess_type(resource) 573 if not type: 574 type = 'application/octet-stream' 575 # @@: I don't know what to do with the encoding. 576 try: 577 file = self.egg.get_resource_stream(self.manager, resource) 578 except (IOError, OSError) as e: 579 exc = httpexceptions.HTTPForbidden( 580 'You are not permitted to view this file (%s)' % e) 581 return exc.wsgi_application(environ, start_response) 582 start_response('200 OK', 583 [('content-type', type)]) 584 return fileapp._FileIter(file) 585 586 def not_found(self, environ, start_response, debug_message=None): 587 exc = httpexceptions.HTTPNotFound( 588 'The resource at %s could not be found' 589 % request.construct_url(environ), 590 comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in egg:%s#%r; debug: %s' 591 % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'), 592 self.egg, self.resource_name, debug_message or '(none)')) 593 return exc.wsgi_application(environ, start_response) 594 595def make_pkg_resources(global_conf, egg, resource_name=''): 596 """ 597 A static file parser that loads data from an egg using 598 ``pkg_resources``. Takes a configuration value ``egg``, which is 599 an egg spec, and a base ``resource_name`` (default empty string) 600 which is the path in the egg that this starts at. 601 """ 602 if pkg_resources is None: 603 raise NotImplementedError("This function requires pkg_resources.") 604 return PkgResourcesParser(egg, resource_name) 605 606def make_url_parser(global_conf, directory, base_python_name, 607 index_names=None, hide_extensions=None, 608 ignore_extensions=None, 609 **constructor_conf): 610 """ 611 Create a URLParser application that looks in ``directory``, which 612 should be the directory for the Python package named in 613 ``base_python_name``. ``index_names`` are used when viewing the 614 directory (like ``'index'`` for ``'index.html'``). 615 ``hide_extensions`` are extensions that are not viewable (like 616 ``'.pyc'``) and ``ignore_extensions`` are viewable but only if an 617 explicit extension is given. 618 """ 619 if index_names is None: 620 index_names = global_conf.get( 621 'index_names', ('index', 'Index', 'main', 'Main')) 622 index_names = converters.aslist(index_names) 623 624 if hide_extensions is None: 625 hide_extensions = global_conf.get( 626 'hide_extensions', ('.pyc', 'bak', 'py~')) 627 hide_extensions = converters.aslist(hide_extensions) 628 629 if ignore_extensions is None: 630 ignore_extensions = global_conf.get( 631 'ignore_extensions', ()) 632 ignore_extensions = converters.aslist(ignore_extensions) 633 # There's no real way to set constructors currently... 634 635 return URLParser({}, directory, base_python_name, 636 index_names=index_names, 637 hide_extensions=hide_extensions, 638 ignore_extensions=ignore_extensions, 639 **constructor_conf) 640