1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3"""
4Map URL prefixes to WSGI applications.  See ``URLMap``
5"""
6
7import re
8import os
9import cgi
10try:
11    # Python 3
12    from collections import MutableMapping as DictMixin
13except ImportError:
14    # Python 2
15    from UserDict import DictMixin
16
17from paste import httpexceptions
18
19__all__ = ['URLMap', 'PathProxyURLMap']
20
21def urlmap_factory(loader, global_conf, **local_conf):
22    if 'not_found_app' in local_conf:
23        not_found_app = local_conf.pop('not_found_app')
24    else:
25        not_found_app = global_conf.get('not_found_app')
26    if not_found_app:
27        not_found_app = loader.get_app(not_found_app, global_conf=global_conf)
28    urlmap = URLMap(not_found_app=not_found_app)
29    for path, app_name in local_conf.items():
30        path = parse_path_expression(path)
31        app = loader.get_app(app_name, global_conf=global_conf)
32        urlmap[path] = app
33    return urlmap
34
35def parse_path_expression(path):
36    """
37    Parses a path expression like 'domain foobar.com port 20 /' or
38    just '/foobar' for a path alone.  Returns as an address that
39    URLMap likes.
40    """
41    parts = path.split()
42    domain = port = path = None
43    while parts:
44        if parts[0] == 'domain':
45            parts.pop(0)
46            if not parts:
47                raise ValueError("'domain' must be followed with a domain name")
48            if domain:
49                raise ValueError("'domain' given twice")
50            domain = parts.pop(0)
51        elif parts[0] == 'port':
52            parts.pop(0)
53            if not parts:
54                raise ValueError("'port' must be followed with a port number")
55            if port:
56                raise ValueError("'port' given twice")
57            port = parts.pop(0)
58        else:
59            if path:
60                raise ValueError("more than one path given (have %r, got %r)"
61                                 % (path, parts[0]))
62            path = parts.pop(0)
63    s = ''
64    if domain:
65        s = 'http://%s' % domain
66    if port:
67        if not domain:
68            raise ValueError("If you give a port, you must also give a domain")
69        s += ':' + port
70    if path:
71        if s:
72            s += '/'
73        s += path
74    return s
75
76class URLMap(DictMixin):
77
78    """
79    URLMap instances are dictionary-like object that dispatch to one
80    of several applications based on the URL.
81
82    The dictionary keys are URLs to match (like
83    ``PATH_INFO.startswith(url)``), and the values are applications to
84    dispatch to.  URLs are matched most-specific-first, i.e., longest
85    URL first.  The ``SCRIPT_NAME`` and ``PATH_INFO`` environmental
86    variables are adjusted to indicate the new context.
87
88    URLs can also include domains, like ``http://blah.com/foo``, or as
89    tuples ``('blah.com', '/foo')``.  This will match domain names; without
90    the ``http://domain`` or with a domain of ``None`` any domain will be
91    matched (so long as no other explicit domain matches).  """
92
93    def __init__(self, not_found_app=None):
94        self.applications = []
95        if not not_found_app:
96            not_found_app = self.not_found_app
97        self.not_found_application = not_found_app
98
99    def __len__(self):
100        return len(self.applications)
101
102    def __iter__(self):
103        for app_url, app in self.applications:
104            yield app_url
105
106    norm_url_re = re.compile('//+')
107    domain_url_re = re.compile('^(http|https)://')
108
109    def not_found_app(self, environ, start_response):
110        mapper = environ.get('paste.urlmap_object')
111        if mapper:
112            matches = [p for p, a in mapper.applications]
113            extra = 'defined apps: %s' % (
114                ',\n  '.join(map(repr, matches)))
115        else:
116            extra = ''
117        extra += '\nSCRIPT_NAME: %r' % cgi.escape(environ.get('SCRIPT_NAME'))
118        extra += '\nPATH_INFO: %r' % cgi.escape(environ.get('PATH_INFO'))
119        extra += '\nHTTP_HOST: %r' % cgi.escape(environ.get('HTTP_HOST'))
120        app = httpexceptions.HTTPNotFound(
121            environ['PATH_INFO'],
122            comment=cgi.escape(extra)).wsgi_application
123        return app(environ, start_response)
124
125    def normalize_url(self, url, trim=True):
126        if isinstance(url, (list, tuple)):
127            domain = url[0]
128            url = self.normalize_url(url[1])[1]
129            return domain, url
130        assert (not url or url.startswith('/')
131                or self.domain_url_re.search(url)), (
132            "URL fragments must start with / or http:// (you gave %r)" % url)
133        match = self.domain_url_re.search(url)
134        if match:
135            url = url[match.end():]
136            if '/' in url:
137                domain, url = url.split('/', 1)
138                url = '/' + url
139            else:
140                domain, url = url, ''
141        else:
142            domain = None
143        url = self.norm_url_re.sub('/', url)
144        if trim:
145            url = url.rstrip('/')
146        return domain, url
147
148    def sort_apps(self):
149        """
150        Make sure applications are sorted with longest URLs first
151        """
152        def key(app_desc):
153            (domain, url), app = app_desc
154            if not domain:
155                # Make sure empty domains sort last:
156                return '\xff', -len(url)
157            else:
158                return domain, -len(url)
159        apps = [(key(desc), desc) for desc in self.applications]
160        apps.sort()
161        self.applications = [desc for (sortable, desc) in apps]
162
163    def __setitem__(self, url, app):
164        if app is None:
165            try:
166                del self[url]
167            except KeyError:
168                pass
169            return
170        dom_url = self.normalize_url(url)
171        if dom_url in self:
172            del self[dom_url]
173        self.applications.append((dom_url, app))
174        self.sort_apps()
175
176    def __getitem__(self, url):
177        dom_url = self.normalize_url(url)
178        for app_url, app in self.applications:
179            if app_url == dom_url:
180                return app
181        raise KeyError(
182            "No application with the url %r (domain: %r; existing: %s)"
183            % (url[1], url[0] or '*', self.applications))
184
185    def __delitem__(self, url):
186        url = self.normalize_url(url)
187        for app_url, app in self.applications:
188            if app_url == url:
189                self.applications.remove((app_url, app))
190                break
191        else:
192            raise KeyError(
193                "No application with the url %r" % (url,))
194
195    def keys(self):
196        return [app_url for app_url, app in self.applications]
197
198    def __call__(self, environ, start_response):
199        host = environ.get('HTTP_HOST', environ.get('SERVER_NAME')).lower()
200        if ':' in host:
201            host, port = host.split(':', 1)
202        else:
203            if environ['wsgi.url_scheme'] == 'http':
204                port = '80'
205            else:
206                port = '443'
207        path_info = environ.get('PATH_INFO')
208        path_info = self.normalize_url(path_info, False)[1]
209        for (domain, app_url), app in self.applications:
210            if domain and domain != host and domain != host+':'+port:
211                continue
212            if (path_info == app_url
213                or path_info.startswith(app_url + '/')):
214                environ['SCRIPT_NAME'] += app_url
215                environ['PATH_INFO'] = path_info[len(app_url):]
216                return app(environ, start_response)
217        environ['paste.urlmap_object'] = self
218        return self.not_found_application(environ, start_response)
219
220
221class PathProxyURLMap(object):
222
223    """
224    This is a wrapper for URLMap that catches any strings that
225    are passed in as applications; these strings are treated as
226    filenames (relative to `base_path`) and are passed to the
227    callable `builder`, which will return an application.
228
229    This is intended for cases when configuration files can be
230    treated as applications.
231
232    `base_paste_url` is the URL under which all applications added through
233    this wrapper must go.  Use ``""`` if you want this to not
234    change incoming URLs.
235    """
236
237    def __init__(self, map, base_paste_url, base_path, builder):
238        self.map = map
239        self.base_paste_url = self.map.normalize_url(base_paste_url)
240        self.base_path = base_path
241        self.builder = builder
242
243    def __setitem__(self, url, app):
244        if isinstance(app, (str, unicode)):
245            app_fn = os.path.join(self.base_path, app)
246            app = self.builder(app_fn)
247        url = self.map.normalize_url(url)
248        # @@: This means http://foo.com/bar will potentially
249        # match foo.com, but /base_paste_url/bar, which is unintuitive
250        url = (url[0] or self.base_paste_url[0],
251               self.base_paste_url[1] + url[1])
252        self.map[url] = app
253
254    def __getattr__(self, attr):
255        return getattr(self.map, attr)
256
257    # This is really the only settable attribute
258    def not_found_application__get(self):
259        return self.map.not_found_application
260    def not_found_application__set(self, value):
261        self.map.not_found_application = value
262    not_found_application = property(not_found_application__get,
263                                     not_found_application__set)
264