1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30import base64
31import re
32
33from urlparse import urljoin as basejoin
34
35__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
37           "urlencode", "url2pathname", "pathname2url", "splittag",
38           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
40           "splitnport", "splitquery", "splitattr", "splitvalue",
41           "getproxies"]
42
43__version__ = '1.17'    # XXX This version is not always updated :-(
44
45MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
46
47# Helper for non-unix systems
48if os.name == 'nt':
49    from nturl2path import url2pathname, pathname2url
50elif os.name == 'riscos':
51    from rourl2path import url2pathname, pathname2url
52else:
53    def url2pathname(pathname):
54        """OS-specific conversion from a relative URL of the 'file' scheme
55        to a file system path; not recommended for general use."""
56        return unquote(pathname)
57
58    def pathname2url(pathname):
59        """OS-specific conversion from a file system path to a relative URL
60        of the 'file' scheme; not recommended for general use."""
61        return quote(pathname)
62
63# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65#     (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
72def urlopen(url, data=None, proxies=None):
73    """Create a file-like object for the specified URL to read from."""
74    from warnings import warnpy3k
75    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76             "favor of urllib2.urlopen()", stacklevel=2)
77
78    global _urlopener
79    if proxies is not None:
80        opener = FancyURLopener(proxies=proxies)
81    elif not _urlopener:
82        opener = FancyURLopener()
83        _urlopener = opener
84    else:
85        opener = _urlopener
86    if data is None:
87        return opener.open(url)
88    else:
89        return opener.open(url, data)
90def urlretrieve(url, filename=None, reporthook=None, data=None):
91    global _urlopener
92    if not _urlopener:
93        _urlopener = FancyURLopener()
94    return _urlopener.retrieve(url, filename, reporthook, data)
95def urlcleanup():
96    if _urlopener:
97        _urlopener.cleanup()
98    _safe_quoters.clear()
99    ftpcache.clear()
100
101# check for SSL
102try:
103    import ssl
104except:
105    _have_ssl = False
106else:
107    _have_ssl = True
108
109# exception raised when downloaded size does not match content-length
110class ContentTooShortError(IOError):
111    def __init__(self, message, content):
112        IOError.__init__(self, message)
113        self.content = content
114
115ftpcache = {}
116class URLopener:
117    """Class to open URLs.
118    This is a class rather than just a subroutine because we may need
119    more than one set of global protocol-specific options.
120    Note -- this is a base class for those who don't want the
121    automatic handling of errors type 302 (relocated) and 401
122    (authorization needed)."""
123
124    __tempfiles = None
125
126    version = "Python-urllib/%s" % __version__
127
128    # Constructor
129    def __init__(self, proxies=None, **x509):
130        if proxies is None:
131            proxies = getproxies()
132        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
133        self.proxies = proxies
134        self.key_file = x509.get('key_file')
135        self.cert_file = x509.get('cert_file')
136        self.addheaders = [('User-Agent', self.version)]
137        self.__tempfiles = []
138        self.__unlink = os.unlink # See cleanup()
139        self.tempcache = None
140        # Undocumented feature: if you assign {} to tempcache,
141        # it is used to cache files retrieved with
142        # self.retrieve().  This is not enabled by default
143        # since it does not work for changing documents (and I
144        # haven't got the logic to check expiration headers
145        # yet).
146        self.ftpcache = ftpcache
147        # Undocumented feature: you can use a different
148        # ftp cache by assigning to the .ftpcache member;
149        # in case you want logically independent URL openers
150        # XXX This is not threadsafe.  Bah.
151
152    def __del__(self):
153        self.close()
154
155    def close(self):
156        self.cleanup()
157
158    def cleanup(self):
159        # This code sometimes runs when the rest of this module
160        # has already been deleted, so it can't use any globals
161        # or import anything.
162        if self.__tempfiles:
163            for file in self.__tempfiles:
164                try:
165                    self.__unlink(file)
166                except OSError:
167                    pass
168            del self.__tempfiles[:]
169        if self.tempcache:
170            self.tempcache.clear()
171
172    def addheader(self, *args):
173        """Add a header to be used by the HTTP interface only
174        e.g. u.addheader('Accept', 'sound/basic')"""
175        self.addheaders.append(args)
176
177    # External interface
178    def open(self, fullurl, data=None):
179        """Use URLopener().open(file) instead of open(file, 'r')."""
180        fullurl = unwrap(toBytes(fullurl))
181        # percent encode url, fixing lame server errors for e.g, like space
182        # within url paths.
183        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
184        if self.tempcache and fullurl in self.tempcache:
185            filename, headers = self.tempcache[fullurl]
186            fp = open(filename, 'rb')
187            return addinfourl(fp, headers, fullurl)
188        urltype, url = splittype(fullurl)
189        if not urltype:
190            urltype = 'file'
191        if urltype in self.proxies:
192            proxy = self.proxies[urltype]
193            urltype, proxyhost = splittype(proxy)
194            host, selector = splithost(proxyhost)
195            url = (host, fullurl) # Signal special case to open_*()
196        else:
197            proxy = None
198        name = 'open_' + urltype
199        self.type = urltype
200        name = name.replace('-', '_')
201        if not hasattr(self, name):
202            if proxy:
203                return self.open_unknown_proxy(proxy, fullurl, data)
204            else:
205                return self.open_unknown(fullurl, data)
206        try:
207            if data is None:
208                return getattr(self, name)(url)
209            else:
210                return getattr(self, name)(url, data)
211        except socket.error, msg:
212            raise IOError, ('socket error', msg), sys.exc_info()[2]
213
214    def open_unknown(self, fullurl, data=None):
215        """Overridable interface to open unknown URL type."""
216        type, url = splittype(fullurl)
217        raise IOError, ('url error', 'unknown url type', type)
218
219    def open_unknown_proxy(self, proxy, fullurl, data=None):
220        """Overridable interface to open unknown URL type."""
221        type, url = splittype(fullurl)
222        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
223
224    # External interface
225    def retrieve(self, url, filename=None, reporthook=None, data=None):
226        """retrieve(url) returns (filename, headers) for a local object
227        or (tempfilename, headers) for a remote object."""
228        url = unwrap(toBytes(url))
229        if self.tempcache and url in self.tempcache:
230            return self.tempcache[url]
231        type, url1 = splittype(url)
232        if filename is None and (not type or type == 'file'):
233            try:
234                fp = self.open_local_file(url1)
235                hdrs = fp.info()
236                fp.close()
237                return url2pathname(splithost(url1)[1]), hdrs
238            except IOError:
239                pass
240        fp = self.open(url, data)
241        try:
242            headers = fp.info()
243            if filename:
244                tfp = open(filename, 'wb')
245            else:
246                import tempfile
247                garbage, path = splittype(url)
248                garbage, path = splithost(path or "")
249                path, garbage = splitquery(path or "")
250                path, garbage = splitattr(path or "")
251                suffix = os.path.splitext(path)[1]
252                (fd, filename) = tempfile.mkstemp(suffix)
253                self.__tempfiles.append(filename)
254                tfp = os.fdopen(fd, 'wb')
255            try:
256                result = filename, headers
257                if self.tempcache is not None:
258                    self.tempcache[url] = result
259                bs = 1024*8
260                size = -1
261                read = 0
262                blocknum = 0
263                if "content-length" in headers:
264                    size = int(headers["Content-Length"])
265                if reporthook:
266                    reporthook(blocknum, bs, size)
267                while 1:
268                    block = fp.read(bs)
269                    if block == "":
270                        break
271                    read += len(block)
272                    tfp.write(block)
273                    blocknum += 1
274                    if reporthook:
275                        reporthook(blocknum, bs, size)
276            finally:
277                tfp.close()
278        finally:
279            fp.close()
280
281        # raise exception if actual size does not match content-length header
282        if size >= 0 and read < size:
283            raise ContentTooShortError("retrieval incomplete: got only %i out "
284                                       "of %i bytes" % (read, size), result)
285
286        return result
287
288    # Each method named open_<type> knows how to open that type of URL
289
290    def open_http(self, url, data=None):
291        """Use HTTP protocol."""
292        import httplib
293        user_passwd = None
294        proxy_passwd= None
295        if isinstance(url, str):
296            host, selector = splithost(url)
297            if host:
298                user_passwd, host = splituser(host)
299                host = unquote(host)
300            realhost = host
301        else:
302            host, selector = url
303            # check whether the proxy contains authorization information
304            proxy_passwd, host = splituser(host)
305            # now we proceed with the url we want to obtain
306            urltype, rest = splittype(selector)
307            url = rest
308            user_passwd = None
309            if urltype.lower() != 'http':
310                realhost = None
311            else:
312                realhost, rest = splithost(rest)
313                if realhost:
314                    user_passwd, realhost = splituser(realhost)
315                if user_passwd:
316                    selector = "%s://%s%s" % (urltype, realhost, rest)
317                if proxy_bypass(realhost):
318                    host = realhost
319
320            #print "proxy via http:", host, selector
321        if not host: raise IOError, ('http error', 'no host given')
322
323        if proxy_passwd:
324            proxy_passwd = unquote(proxy_passwd)
325            proxy_auth = base64.b64encode(proxy_passwd).strip()
326        else:
327            proxy_auth = None
328
329        if user_passwd:
330            user_passwd = unquote(user_passwd)
331            auth = base64.b64encode(user_passwd).strip()
332        else:
333            auth = None
334        h = httplib.HTTP(host)
335        if data is not None:
336            h.putrequest('POST', selector)
337            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338            h.putheader('Content-Length', '%d' % len(data))
339        else:
340            h.putrequest('GET', selector)
341        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
342        if auth: h.putheader('Authorization', 'Basic %s' % auth)
343        if realhost: h.putheader('Host', realhost)
344        for args in self.addheaders: h.putheader(*args)
345        h.endheaders(data)
346        errcode, errmsg, headers = h.getreply()
347        fp = h.getfile()
348        if errcode == -1:
349            if fp: fp.close()
350            # something went wrong with the HTTP status line
351            raise IOError, ('http protocol error', 0,
352                            'got a bad status line', None)
353        # According to RFC 2616, "2xx" code indicates that the client's
354        # request was successfully received, understood, and accepted.
355        if (200 <= errcode < 300):
356            return addinfourl(fp, headers, "http:" + url, errcode)
357        else:
358            if data is None:
359                return self.http_error(url, fp, errcode, errmsg, headers)
360            else:
361                return self.http_error(url, fp, errcode, errmsg, headers, data)
362
363    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
364        """Handle http errors.
365        Derived class can override this, or provide specific handlers
366        named http_error_DDD where DDD is the 3-digit error code."""
367        # First check if there's a specific handler for this error
368        name = 'http_error_%d' % errcode
369        if hasattr(self, name):
370            method = getattr(self, name)
371            if data is None:
372                result = method(url, fp, errcode, errmsg, headers)
373            else:
374                result = method(url, fp, errcode, errmsg, headers, data)
375            if result: return result
376        return self.http_error_default(url, fp, errcode, errmsg, headers)
377
378    def http_error_default(self, url, fp, errcode, errmsg, headers):
379        """Default error handler: close the connection and raise IOError."""
380        fp.close()
381        raise IOError, ('http error', errcode, errmsg, headers)
382
383    if _have_ssl:
384        def open_https(self, url, data=None):
385            """Use HTTPS protocol."""
386
387            import httplib
388            user_passwd = None
389            proxy_passwd = None
390            if isinstance(url, str):
391                host, selector = splithost(url)
392                if host:
393                    user_passwd, host = splituser(host)
394                    host = unquote(host)
395                realhost = host
396            else:
397                host, selector = url
398                # here, we determine, whether the proxy contains authorization information
399                proxy_passwd, host = splituser(host)
400                urltype, rest = splittype(selector)
401                url = rest
402                user_passwd = None
403                if urltype.lower() != 'https':
404                    realhost = None
405                else:
406                    realhost, rest = splithost(rest)
407                    if realhost:
408                        user_passwd, realhost = splituser(realhost)
409                    if user_passwd:
410                        selector = "%s://%s%s" % (urltype, realhost, rest)
411                #print "proxy via https:", host, selector
412            if not host: raise IOError, ('https error', 'no host given')
413            if proxy_passwd:
414                proxy_passwd = unquote(proxy_passwd)
415                proxy_auth = base64.b64encode(proxy_passwd).strip()
416            else:
417                proxy_auth = None
418            if user_passwd:
419                user_passwd = unquote(user_passwd)
420                auth = base64.b64encode(user_passwd).strip()
421            else:
422                auth = None
423            h = httplib.HTTPS(host, 0,
424                              key_file=self.key_file,
425                              cert_file=self.cert_file)
426            if data is not None:
427                h.putrequest('POST', selector)
428                h.putheader('Content-Type',
429                            'application/x-www-form-urlencoded')
430                h.putheader('Content-Length', '%d' % len(data))
431            else:
432                h.putrequest('GET', selector)
433            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
434            if auth: h.putheader('Authorization', 'Basic %s' % auth)
435            if realhost: h.putheader('Host', realhost)
436            for args in self.addheaders: h.putheader(*args)
437            h.endheaders(data)
438            errcode, errmsg, headers = h.getreply()
439            fp = h.getfile()
440            if errcode == -1:
441                if fp: fp.close()
442                # something went wrong with the HTTP status line
443                raise IOError, ('http protocol error', 0,
444                                'got a bad status line', None)
445            # According to RFC 2616, "2xx" code indicates that the client's
446            # request was successfully received, understood, and accepted.
447            if (200 <= errcode < 300):
448                return addinfourl(fp, headers, "https:" + url, errcode)
449            else:
450                if data is None:
451                    return self.http_error(url, fp, errcode, errmsg, headers)
452                else:
453                    return self.http_error(url, fp, errcode, errmsg, headers,
454                                           data)
455
456    def open_file(self, url):
457        """Use local file or FTP depending on form of URL."""
458        if not isinstance(url, str):
459            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
460        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
461            return self.open_ftp(url)
462        else:
463            return self.open_local_file(url)
464
465    def open_local_file(self, url):
466        """Use local file."""
467        import mimetypes, mimetools, email.utils
468        try:
469            from cStringIO import StringIO
470        except ImportError:
471            from StringIO import StringIO
472        host, file = splithost(url)
473        localname = url2pathname(file)
474        try:
475            stats = os.stat(localname)
476        except OSError, e:
477            raise IOError(e.errno, e.strerror, e.filename)
478        size = stats.st_size
479        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
480        mtype = mimetypes.guess_type(url)[0]
481        headers = mimetools.Message(StringIO(
482            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
483            (mtype or 'text/plain', size, modified)))
484        if not host:
485            urlfile = file
486            if file[:1] == '/':
487                urlfile = 'file://' + file
488            elif file[:2] == './':
489                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
490            return addinfourl(open(localname, 'rb'),
491                              headers, urlfile)
492        host, port = splitport(host)
493        if not port \
494           and socket.gethostbyname(host) in (localhost(), thishost()):
495            urlfile = file
496            if file[:1] == '/':
497                urlfile = 'file://' + file
498            return addinfourl(open(localname, 'rb'),
499                              headers, urlfile)
500        raise IOError, ('local file error', 'not on local host')
501
502    def open_ftp(self, url):
503        """Use FTP protocol."""
504        if not isinstance(url, str):
505            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
506        import mimetypes, mimetools
507        try:
508            from cStringIO import StringIO
509        except ImportError:
510            from StringIO import StringIO
511        host, path = splithost(url)
512        if not host: raise IOError, ('ftp error', 'no host given')
513        host, port = splitport(host)
514        user, host = splituser(host)
515        if user: user, passwd = splitpasswd(user)
516        else: passwd = None
517        host = unquote(host)
518        user = user or ''
519        passwd = passwd or ''
520        host = socket.gethostbyname(host)
521        if not port:
522            import ftplib
523            port = ftplib.FTP_PORT
524        else:
525            port = int(port)
526        path, attrs = splitattr(path)
527        path = unquote(path)
528        dirs = path.split('/')
529        dirs, file = dirs[:-1], dirs[-1]
530        if dirs and not dirs[0]: dirs = dirs[1:]
531        if dirs and not dirs[0]: dirs[0] = '/'
532        key = user, host, port, '/'.join(dirs)
533        # XXX thread unsafe!
534        if len(self.ftpcache) > MAXFTPCACHE:
535            # Prune the cache, rather arbitrarily
536            for k in self.ftpcache.keys():
537                if k != key:
538                    v = self.ftpcache[k]
539                    del self.ftpcache[k]
540                    v.close()
541        try:
542            if not key in self.ftpcache:
543                self.ftpcache[key] = \
544                    ftpwrapper(user, passwd, host, port, dirs)
545            if not file: type = 'D'
546            else: type = 'I'
547            for attr in attrs:
548                attr, value = splitvalue(attr)
549                if attr.lower() == 'type' and \
550                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
551                    type = value.upper()
552            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
553            mtype = mimetypes.guess_type("ftp:" + url)[0]
554            headers = ""
555            if mtype:
556                headers += "Content-Type: %s\n" % mtype
557            if retrlen is not None and retrlen >= 0:
558                headers += "Content-Length: %d\n" % retrlen
559            headers = mimetools.Message(StringIO(headers))
560            return addinfourl(fp, headers, "ftp:" + url)
561        except ftperrors(), msg:
562            raise IOError, ('ftp error', msg), sys.exc_info()[2]
563
564    def open_data(self, url, data=None):
565        """Use "data" URL."""
566        if not isinstance(url, str):
567            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
568        # ignore POSTed data
569        #
570        # syntax of data URLs:
571        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
572        # mediatype := [ type "/" subtype ] *( ";" parameter )
573        # data      := *urlchar
574        # parameter := attribute "=" value
575        import mimetools
576        try:
577            from cStringIO import StringIO
578        except ImportError:
579            from StringIO import StringIO
580        try:
581            [type, data] = url.split(',', 1)
582        except ValueError:
583            raise IOError, ('data error', 'bad data URL')
584        if not type:
585            type = 'text/plain;charset=US-ASCII'
586        semi = type.rfind(';')
587        if semi >= 0 and '=' not in type[semi:]:
588            encoding = type[semi+1:]
589            type = type[:semi]
590        else:
591            encoding = ''
592        msg = []
593        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
594                                            time.gmtime(time.time())))
595        msg.append('Content-type: %s' % type)
596        if encoding == 'base64':
597            data = base64.decodestring(data)
598        else:
599            data = unquote(data)
600        msg.append('Content-Length: %d' % len(data))
601        msg.append('')
602        msg.append(data)
603        msg = '\n'.join(msg)
604        f = StringIO(msg)
605        headers = mimetools.Message(f, 0)
606        #f.fileno = None     # needed for addinfourl
607        return addinfourl(f, headers, url)
608
609
610class FancyURLopener(URLopener):
611    """Derived class with handlers for errors we can handle (perhaps)."""
612
613    def __init__(self, *args, **kwargs):
614        URLopener.__init__(self, *args, **kwargs)
615        self.auth_cache = {}
616        self.tries = 0
617        self.maxtries = 10
618
619    def http_error_default(self, url, fp, errcode, errmsg, headers):
620        """Default error handling -- don't raise an exception."""
621        return addinfourl(fp, headers, "http:" + url, errcode)
622
623    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
624        """Error 302 -- relocated (temporarily)."""
625        self.tries += 1
626        if self.maxtries and self.tries >= self.maxtries:
627            if hasattr(self, "http_error_500"):
628                meth = self.http_error_500
629            else:
630                meth = self.http_error_default
631            self.tries = 0
632            return meth(url, fp, 500,
633                        "Internal Server Error: Redirect Recursion", headers)
634        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
635                                        data)
636        self.tries = 0
637        return result
638
639    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
640        if 'location' in headers:
641            newurl = headers['location']
642        elif 'uri' in headers:
643            newurl = headers['uri']
644        else:
645            return
646        fp.close()
647        # In case the server sent a relative URL, join with original:
648        newurl = basejoin(self.type + ":" + url, newurl)
649
650        # For security reasons we do not allow redirects to protocols
651        # other than HTTP, HTTPS or FTP.
652        newurl_lower = newurl.lower()
653        if not (newurl_lower.startswith('http://') or
654                newurl_lower.startswith('https://') or
655                newurl_lower.startswith('ftp://')):
656            raise IOError('redirect error', errcode,
657                          errmsg + " - Redirection to url '%s' is not allowed" %
658                          newurl,
659                          headers)
660
661        return self.open(newurl)
662
663    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
664        """Error 301 -- also relocated (permanently)."""
665        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
666
667    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
668        """Error 303 -- also relocated (essentially identical to 302)."""
669        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
670
671    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
672        """Error 307 -- relocated, but turn POST into error."""
673        if data is None:
674            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
675        else:
676            return self.http_error_default(url, fp, errcode, errmsg, headers)
677
678    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
679        """Error 401 -- authentication required.
680        This function supports Basic authentication only."""
681        if not 'www-authenticate' in headers:
682            URLopener.http_error_default(self, url, fp,
683                                         errcode, errmsg, headers)
684        stuff = headers['www-authenticate']
685        import re
686        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
687        if not match:
688            URLopener.http_error_default(self, url, fp,
689                                         errcode, errmsg, headers)
690        scheme, realm = match.groups()
691        if scheme.lower() != 'basic':
692            URLopener.http_error_default(self, url, fp,
693                                         errcode, errmsg, headers)
694        name = 'retry_' + self.type + '_basic_auth'
695        if data is None:
696            return getattr(self,name)(url, realm)
697        else:
698            return getattr(self,name)(url, realm, data)
699
700    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
701        """Error 407 -- proxy authentication required.
702        This function supports Basic authentication only."""
703        if not 'proxy-authenticate' in headers:
704            URLopener.http_error_default(self, url, fp,
705                                         errcode, errmsg, headers)
706        stuff = headers['proxy-authenticate']
707        import re
708        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
709        if not match:
710            URLopener.http_error_default(self, url, fp,
711                                         errcode, errmsg, headers)
712        scheme, realm = match.groups()
713        if scheme.lower() != 'basic':
714            URLopener.http_error_default(self, url, fp,
715                                         errcode, errmsg, headers)
716        name = 'retry_proxy_' + self.type + '_basic_auth'
717        if data is None:
718            return getattr(self,name)(url, realm)
719        else:
720            return getattr(self,name)(url, realm, data)
721
722    def retry_proxy_http_basic_auth(self, url, realm, data=None):
723        host, selector = splithost(url)
724        newurl = 'http://' + host + selector
725        proxy = self.proxies['http']
726        urltype, proxyhost = splittype(proxy)
727        proxyhost, proxyselector = splithost(proxyhost)
728        i = proxyhost.find('@') + 1
729        proxyhost = proxyhost[i:]
730        user, passwd = self.get_user_passwd(proxyhost, realm, i)
731        if not (user or passwd): return None
732        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
733        self.proxies['http'] = 'http://' + proxyhost + proxyselector
734        if data is None:
735            return self.open(newurl)
736        else:
737            return self.open(newurl, data)
738
739    def retry_proxy_https_basic_auth(self, url, realm, data=None):
740        host, selector = splithost(url)
741        newurl = 'https://' + host + selector
742        proxy = self.proxies['https']
743        urltype, proxyhost = splittype(proxy)
744        proxyhost, proxyselector = splithost(proxyhost)
745        i = proxyhost.find('@') + 1
746        proxyhost = proxyhost[i:]
747        user, passwd = self.get_user_passwd(proxyhost, realm, i)
748        if not (user or passwd): return None
749        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
750        self.proxies['https'] = 'https://' + proxyhost + proxyselector
751        if data is None:
752            return self.open(newurl)
753        else:
754            return self.open(newurl, data)
755
756    def retry_http_basic_auth(self, url, realm, data=None):
757        host, selector = splithost(url)
758        i = host.find('@') + 1
759        host = host[i:]
760        user, passwd = self.get_user_passwd(host, realm, i)
761        if not (user or passwd): return None
762        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
763        newurl = 'http://' + host + selector
764        if data is None:
765            return self.open(newurl)
766        else:
767            return self.open(newurl, data)
768
769    def retry_https_basic_auth(self, url, realm, data=None):
770        host, selector = splithost(url)
771        i = host.find('@') + 1
772        host = host[i:]
773        user, passwd = self.get_user_passwd(host, realm, i)
774        if not (user or passwd): return None
775        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
776        newurl = 'https://' + host + selector
777        if data is None:
778            return self.open(newurl)
779        else:
780            return self.open(newurl, data)
781
782    def get_user_passwd(self, host, realm, clear_cache=0):
783        key = realm + '@' + host.lower()
784        if key in self.auth_cache:
785            if clear_cache:
786                del self.auth_cache[key]
787            else:
788                return self.auth_cache[key]
789        user, passwd = self.prompt_user_passwd(host, realm)
790        if user or passwd: self.auth_cache[key] = (user, passwd)
791        return user, passwd
792
793    def prompt_user_passwd(self, host, realm):
794        """Override this in a GUI environment!"""
795        import getpass
796        try:
797            user = raw_input("Enter username for %s at %s: " % (realm,
798                                                                host))
799            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
800                (user, realm, host))
801            return user, passwd
802        except KeyboardInterrupt:
803            print
804            return None, None
805
806
807# Utility functions
808
809_localhost = None
810def localhost():
811    """Return the IP address of the magic hostname 'localhost'."""
812    global _localhost
813    if _localhost is None:
814        _localhost = socket.gethostbyname('localhost')
815    return _localhost
816
817_thishost = None
818def thishost():
819    """Return the IP address of the current host."""
820    global _thishost
821    if _thishost is None:
822        _thishost = socket.gethostbyname(socket.gethostname())
823    return _thishost
824
825_ftperrors = None
826def ftperrors():
827    """Return the set of errors raised by the FTP class."""
828    global _ftperrors
829    if _ftperrors is None:
830        import ftplib
831        _ftperrors = ftplib.all_errors
832    return _ftperrors
833
834_noheaders = None
835def noheaders():
836    """Return an empty mimetools.Message object."""
837    global _noheaders
838    if _noheaders is None:
839        import mimetools
840        try:
841            from cStringIO import StringIO
842        except ImportError:
843            from StringIO import StringIO
844        _noheaders = mimetools.Message(StringIO(), 0)
845        _noheaders.fp.close()   # Recycle file descriptor
846    return _noheaders
847
848
849# Utility classes
850
851class ftpwrapper:
852    """Class used by open_ftp() for cache of open FTP connections."""
853
854    def __init__(self, user, passwd, host, port, dirs,
855                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
856                 persistent=True):
857        self.user = user
858        self.passwd = passwd
859        self.host = host
860        self.port = port
861        self.dirs = dirs
862        self.timeout = timeout
863        self.refcount = 0
864        self.keepalive = persistent
865        self.init()
866
867    def init(self):
868        import ftplib
869        self.busy = 0
870        self.ftp = ftplib.FTP()
871        self.ftp.connect(self.host, self.port, self.timeout)
872        self.ftp.login(self.user, self.passwd)
873        for dir in self.dirs:
874            self.ftp.cwd(dir)
875
876    def retrfile(self, file, type):
877        import ftplib
878        self.endtransfer()
879        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
880        else: cmd = 'TYPE ' + type; isdir = 0
881        try:
882            self.ftp.voidcmd(cmd)
883        except ftplib.all_errors:
884            self.init()
885            self.ftp.voidcmd(cmd)
886        conn = None
887        if file and not isdir:
888            # Try to retrieve as a file
889            try:
890                cmd = 'RETR ' + file
891                conn, retrlen = self.ftp.ntransfercmd(cmd)
892            except ftplib.error_perm, reason:
893                if str(reason)[:3] != '550':
894                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
895        if not conn:
896            # Set transfer mode to ASCII!
897            self.ftp.voidcmd('TYPE A')
898            # Try a directory listing. Verify that directory exists.
899            if file:
900                pwd = self.ftp.pwd()
901                try:
902                    try:
903                        self.ftp.cwd(file)
904                    except ftplib.error_perm, reason:
905                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
906                finally:
907                    self.ftp.cwd(pwd)
908                cmd = 'LIST ' + file
909            else:
910                cmd = 'LIST'
911            conn, retrlen = self.ftp.ntransfercmd(cmd)
912        self.busy = 1
913        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
914        self.refcount += 1
915        conn.close()
916        # Pass back both a suitably decorated object and a retrieval length
917        return (ftpobj, retrlen)
918
919    def endtransfer(self):
920        if not self.busy:
921            return
922        self.busy = 0
923        try:
924            self.ftp.voidresp()
925        except ftperrors():
926            pass
927
928    def close(self):
929        self.keepalive = False
930        if self.refcount <= 0:
931            self.real_close()
932
933    def file_close(self):
934        self.endtransfer()
935        self.refcount -= 1
936        if self.refcount <= 0 and not self.keepalive:
937            self.real_close()
938
939    def real_close(self):
940        self.endtransfer()
941        try:
942            self.ftp.close()
943        except ftperrors():
944            pass
945
946class addbase:
947    """Base class for addinfo and addclosehook."""
948
949    def __init__(self, fp):
950        self.fp = fp
951        self.read = self.fp.read
952        self.readline = self.fp.readline
953        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
954        if hasattr(self.fp, "fileno"):
955            self.fileno = self.fp.fileno
956        else:
957            self.fileno = lambda: None
958        if hasattr(self.fp, "__iter__"):
959            self.__iter__ = self.fp.__iter__
960            if hasattr(self.fp, "next"):
961                self.next = self.fp.next
962
963    def __repr__(self):
964        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
965                                             id(self), self.fp)
966
967    def close(self):
968        self.read = None
969        self.readline = None
970        self.readlines = None
971        self.fileno = None
972        if self.fp: self.fp.close()
973        self.fp = None
974
975class addclosehook(addbase):
976    """Class to add a close hook to an open file."""
977
978    def __init__(self, fp, closehook, *hookargs):
979        addbase.__init__(self, fp)
980        self.closehook = closehook
981        self.hookargs = hookargs
982
983    def close(self):
984        if self.closehook:
985            self.closehook(*self.hookargs)
986            self.closehook = None
987            self.hookargs = None
988        addbase.close(self)
989
990class addinfo(addbase):
991    """class to add an info() method to an open file."""
992
993    def __init__(self, fp, headers):
994        addbase.__init__(self, fp)
995        self.headers = headers
996
997    def info(self):
998        return self.headers
999
1000class addinfourl(addbase):
1001    """class to add info() and geturl() methods to an open file."""
1002
1003    def __init__(self, fp, headers, url, code=None):
1004        addbase.__init__(self, fp)
1005        self.headers = headers
1006        self.url = url
1007        self.code = code
1008
1009    def info(self):
1010        return self.headers
1011
1012    def getcode(self):
1013        return self.code
1014
1015    def geturl(self):
1016        return self.url
1017
1018
1019# Utilities to parse URLs (most of these return None for missing parts):
1020# unwrap('<URL:type://host/path>') --> 'type://host/path'
1021# splittype('type:opaquestring') --> 'type', 'opaquestring'
1022# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1023# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1024# splitpasswd('user:passwd') -> 'user', 'passwd'
1025# splitport('host:port') --> 'host', 'port'
1026# splitquery('/path?query') --> '/path', 'query'
1027# splittag('/path#tag') --> '/path', 'tag'
1028# splitattr('/path;attr1=value1;attr2=value2;...') ->
1029#   '/path', ['attr1=value1', 'attr2=value2', ...]
1030# splitvalue('attr=value') --> 'attr', 'value'
1031# unquote('abc%20def') -> 'abc def'
1032# quote('abc def') -> 'abc%20def')
1033
1034try:
1035    unicode
1036except NameError:
1037    def _is_unicode(x):
1038        return 0
1039else:
1040    def _is_unicode(x):
1041        return isinstance(x, unicode)
1042
1043def toBytes(url):
1044    """toBytes(u"URL") --> 'URL'."""
1045    # Most URL schemes require ASCII. If that changes, the conversion
1046    # can be relaxed
1047    if _is_unicode(url):
1048        try:
1049            url = url.encode("ASCII")
1050        except UnicodeError:
1051            raise UnicodeError("URL " + repr(url) +
1052                               " contains non-ASCII characters")
1053    return url
1054
1055def unwrap(url):
1056    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1057    url = url.strip()
1058    if url[:1] == '<' and url[-1:] == '>':
1059        url = url[1:-1].strip()
1060    if url[:4] == 'URL:': url = url[4:].strip()
1061    return url
1062
1063_typeprog = None
1064def splittype(url):
1065    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1066    global _typeprog
1067    if _typeprog is None:
1068        import re
1069        _typeprog = re.compile('^([^/:]+):')
1070
1071    match = _typeprog.match(url)
1072    if match:
1073        scheme = match.group(1)
1074        return scheme.lower(), url[len(scheme) + 1:]
1075    return None, url
1076
1077_hostprog = None
1078def splithost(url):
1079    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1080    global _hostprog
1081    if _hostprog is None:
1082        import re
1083        _hostprog = re.compile('^//([^/?]*)(.*)$')
1084
1085    match = _hostprog.match(url)
1086    if match:
1087        host_port = match.group(1)
1088        path = match.group(2)
1089        if path and not path.startswith('/'):
1090            path = '/' + path
1091        return host_port, path
1092    return None, url
1093
1094_userprog = None
1095def splituser(host):
1096    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1097    global _userprog
1098    if _userprog is None:
1099        import re
1100        _userprog = re.compile('^(.*)@(.*)$')
1101
1102    match = _userprog.match(host)
1103    if match: return match.group(1, 2)
1104    return None, host
1105
1106_passwdprog = None
1107def splitpasswd(user):
1108    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1109    global _passwdprog
1110    if _passwdprog is None:
1111        import re
1112        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1113
1114    match = _passwdprog.match(user)
1115    if match: return match.group(1, 2)
1116    return user, None
1117
1118# splittag('/path#tag') --> '/path', 'tag'
1119_portprog = None
1120def splitport(host):
1121    """splitport('host:port') --> 'host', 'port'."""
1122    global _portprog
1123    if _portprog is None:
1124        import re
1125        _portprog = re.compile('^(.*):([0-9]+)$')
1126
1127    match = _portprog.match(host)
1128    if match: return match.group(1, 2)
1129    return host, None
1130
1131_nportprog = None
1132def splitnport(host, defport=-1):
1133    """Split host and port, returning numeric port.
1134    Return given default port if no ':' found; defaults to -1.
1135    Return numerical port if a valid number are found after ':'.
1136    Return None if ':' but not a valid number."""
1137    global _nportprog
1138    if _nportprog is None:
1139        import re
1140        _nportprog = re.compile('^(.*):(.*)$')
1141
1142    match = _nportprog.match(host)
1143    if match:
1144        host, port = match.group(1, 2)
1145        try:
1146            if not port: raise ValueError, "no digits"
1147            nport = int(port)
1148        except ValueError:
1149            nport = None
1150        return host, nport
1151    return host, defport
1152
1153_queryprog = None
1154def splitquery(url):
1155    """splitquery('/path?query') --> '/path', 'query'."""
1156    global _queryprog
1157    if _queryprog is None:
1158        import re
1159        _queryprog = re.compile('^(.*)\?([^?]*)$')
1160
1161    match = _queryprog.match(url)
1162    if match: return match.group(1, 2)
1163    return url, None
1164
1165_tagprog = None
1166def splittag(url):
1167    """splittag('/path#tag') --> '/path', 'tag'."""
1168    global _tagprog
1169    if _tagprog is None:
1170        import re
1171        _tagprog = re.compile('^(.*)#([^#]*)$')
1172
1173    match = _tagprog.match(url)
1174    if match: return match.group(1, 2)
1175    return url, None
1176
1177def splitattr(url):
1178    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1179        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1180    words = url.split(';')
1181    return words[0], words[1:]
1182
1183_valueprog = None
1184def splitvalue(attr):
1185    """splitvalue('attr=value') --> 'attr', 'value'."""
1186    global _valueprog
1187    if _valueprog is None:
1188        import re
1189        _valueprog = re.compile('^([^=]*)=(.*)$')
1190
1191    match = _valueprog.match(attr)
1192    if match: return match.group(1, 2)
1193    return attr, None
1194
1195# urlparse contains a duplicate of this method to avoid a circular import.  If
1196# you update this method, also update the copy in urlparse.  This code
1197# duplication does not exist in Python3.
1198
1199_hexdig = '0123456789ABCDEFabcdef'
1200_hextochr = dict((a + b, chr(int(a + b, 16)))
1201                 for a in _hexdig for b in _hexdig)
1202_asciire = re.compile('([\x00-\x7f]+)')
1203
1204def unquote(s):
1205    """unquote('abc%20def') -> 'abc def'."""
1206    if _is_unicode(s):
1207        if '%' not in s:
1208            return s
1209        bits = _asciire.split(s)
1210        res = [bits[0]]
1211        append = res.append
1212        for i in range(1, len(bits), 2):
1213            append(unquote(str(bits[i])).decode('latin1'))
1214            append(bits[i + 1])
1215        return ''.join(res)
1216
1217    bits = s.split('%')
1218    # fastpath
1219    if len(bits) == 1:
1220        return s
1221    res = [bits[0]]
1222    append = res.append
1223    for item in bits[1:]:
1224        try:
1225            append(_hextochr[item[:2]])
1226            append(item[2:])
1227        except KeyError:
1228            append('%')
1229            append(item)
1230    return ''.join(res)
1231
1232def unquote_plus(s):
1233    """unquote('%7e/abc+def') -> '~/abc def'"""
1234    s = s.replace('+', ' ')
1235    return unquote(s)
1236
1237always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1238               'abcdefghijklmnopqrstuvwxyz'
1239               '0123456789' '_.-')
1240_safe_map = {}
1241for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1242    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1243_safe_quoters = {}
1244
1245def quote(s, safe='/'):
1246    """quote('abc def') -> 'abc%20def'
1247
1248    Each part of a URL, e.g. the path info, the query, etc., has a
1249    different set of reserved characters that must be quoted.
1250
1251    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1252    the following reserved characters.
1253
1254    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1255                  "$" | ","
1256
1257    Each of these characters is reserved in some component of a URL,
1258    but not necessarily in all of them.
1259
1260    By default, the quote function is intended for quoting the path
1261    section of a URL.  Thus, it will not encode '/'.  This character
1262    is reserved, but in typical usage the quote function is being
1263    called on a path where the existing slash characters are used as
1264    reserved characters.
1265    """
1266    # fastpath
1267    if not s:
1268        if s is None:
1269            raise TypeError('None object cannot be quoted')
1270        return s
1271    cachekey = (safe, always_safe)
1272    try:
1273        (quoter, safe) = _safe_quoters[cachekey]
1274    except KeyError:
1275        safe_map = _safe_map.copy()
1276        safe_map.update([(c, c) for c in safe])
1277        quoter = safe_map.__getitem__
1278        safe = always_safe + safe
1279        _safe_quoters[cachekey] = (quoter, safe)
1280    if not s.rstrip(safe):
1281        return s
1282    return ''.join(map(quoter, s))
1283
1284def quote_plus(s, safe=''):
1285    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1286    if ' ' in s:
1287        s = quote(s, safe + ' ')
1288        return s.replace(' ', '+')
1289    return quote(s, safe)
1290
1291def urlencode(query, doseq=0):
1292    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1293
1294    If any values in the query arg are sequences and doseq is true, each
1295    sequence element is converted to a separate parameter.
1296
1297    If the query arg is a sequence of two-element tuples, the order of the
1298    parameters in the output will match the order of parameters in the
1299    input.
1300    """
1301
1302    if hasattr(query,"items"):
1303        # mapping objects
1304        query = query.items()
1305    else:
1306        # it's a bother at times that strings and string-like objects are
1307        # sequences...
1308        try:
1309            # non-sequence items should not work with len()
1310            # non-empty strings will fail this
1311            if len(query) and not isinstance(query[0], tuple):
1312                raise TypeError
1313            # zero-length sequences of all types will get here and succeed,
1314            # but that's a minor nit - since the original implementation
1315            # allowed empty dicts that type of behavior probably should be
1316            # preserved for consistency
1317        except TypeError:
1318            ty,va,tb = sys.exc_info()
1319            raise TypeError, "not a valid non-string sequence or mapping object", tb
1320
1321    l = []
1322    if not doseq:
1323        # preserve old behavior
1324        for k, v in query:
1325            k = quote_plus(str(k))
1326            v = quote_plus(str(v))
1327            l.append(k + '=' + v)
1328    else:
1329        for k, v in query:
1330            k = quote_plus(str(k))
1331            if isinstance(v, str):
1332                v = quote_plus(v)
1333                l.append(k + '=' + v)
1334            elif _is_unicode(v):
1335                # is there a reasonable way to convert to ASCII?
1336                # encode generates a string, but "replace" or "ignore"
1337                # lose information and "strict" can raise UnicodeError
1338                v = quote_plus(v.encode("ASCII","replace"))
1339                l.append(k + '=' + v)
1340            else:
1341                try:
1342                    # is this a sufficient test for sequence-ness?
1343                    len(v)
1344                except TypeError:
1345                    # not a sequence
1346                    v = quote_plus(str(v))
1347                    l.append(k + '=' + v)
1348                else:
1349                    # loop over the sequence
1350                    for elt in v:
1351                        l.append(k + '=' + quote_plus(str(elt)))
1352    return '&'.join(l)
1353
1354# Proxy handling
1355def getproxies_environment():
1356    """Return a dictionary of scheme -> proxy server URL mappings.
1357
1358    Scan the environment for variables named <scheme>_proxy;
1359    this seems to be the standard convention.  If you need a
1360    different way, you can pass a proxies dictionary to the
1361    [Fancy]URLopener constructor.
1362
1363    """
1364    proxies = {}
1365    for name, value in os.environ.items():
1366        name = name.lower()
1367        if value and name[-6:] == '_proxy':
1368            proxies[name[:-6]] = value
1369    return proxies
1370
1371def proxy_bypass_environment(host):
1372    """Test if proxies should not be used for a particular host.
1373
1374    Checks the environment for a variable named no_proxy, which should
1375    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1376    """
1377    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1378    # '*' is special case for always bypass
1379    if no_proxy == '*':
1380        return 1
1381    # strip port off host
1382    hostonly, port = splitport(host)
1383    # check if the host ends with any of the DNS suffixes
1384    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1385    for name in no_proxy_list:
1386        if name and (hostonly.endswith(name) or host.endswith(name)):
1387            return 1
1388    # otherwise, don't bypass
1389    return 0
1390
1391
1392if sys.platform == 'darwin':
1393    from _scproxy import _get_proxy_settings, _get_proxies
1394
1395    def proxy_bypass_macosx_sysconf(host):
1396        """
1397        Return True iff this host shouldn't be accessed using a proxy
1398
1399        This function uses the MacOSX framework SystemConfiguration
1400        to fetch the proxy information.
1401        """
1402        import re
1403        import socket
1404        from fnmatch import fnmatch
1405
1406        hostonly, port = splitport(host)
1407
1408        def ip2num(ipAddr):
1409            parts = ipAddr.split('.')
1410            parts = map(int, parts)
1411            if len(parts) != 4:
1412                parts = (parts + [0, 0, 0, 0])[:4]
1413            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1414
1415        proxy_settings = _get_proxy_settings()
1416
1417        # Check for simple host names:
1418        if '.' not in host:
1419            if proxy_settings['exclude_simple']:
1420                return True
1421
1422        hostIP = None
1423
1424        for value in proxy_settings.get('exceptions', ()):
1425            # Items in the list are strings like these: *.local, 169.254/16
1426            if not value: continue
1427
1428            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1429            if m is not None:
1430                if hostIP is None:
1431                    try:
1432                        hostIP = socket.gethostbyname(hostonly)
1433                        hostIP = ip2num(hostIP)
1434                    except socket.error:
1435                        continue
1436
1437                base = ip2num(m.group(1))
1438                mask = m.group(2)
1439                if mask is None:
1440                    mask = 8 * (m.group(1).count('.') + 1)
1441
1442                else:
1443                    mask = int(mask[1:])
1444                mask = 32 - mask
1445
1446                if (hostIP >> mask) == (base >> mask):
1447                    return True
1448
1449            elif fnmatch(host, value):
1450                return True
1451
1452        return False
1453
1454    def getproxies_macosx_sysconf():
1455        """Return a dictionary of scheme -> proxy server URL mappings.
1456
1457        This function uses the MacOSX framework SystemConfiguration
1458        to fetch the proxy information.
1459        """
1460        return _get_proxies()
1461
1462    def proxy_bypass(host):
1463        if getproxies_environment():
1464            return proxy_bypass_environment(host)
1465        else:
1466            return proxy_bypass_macosx_sysconf(host)
1467
1468    def getproxies():
1469        return getproxies_environment() or getproxies_macosx_sysconf()
1470
1471elif os.name == 'nt':
1472    def getproxies_registry():
1473        """Return a dictionary of scheme -> proxy server URL mappings.
1474
1475        Win32 uses the registry to store proxies.
1476
1477        """
1478        proxies = {}
1479        try:
1480            import _winreg
1481        except ImportError:
1482            # Std module, so should be around - but you never know!
1483            return proxies
1484        try:
1485            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1486                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1487            proxyEnable = _winreg.QueryValueEx(internetSettings,
1488                                               'ProxyEnable')[0]
1489            if proxyEnable:
1490                # Returned as Unicode but problems if not converted to ASCII
1491                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1492                                                       'ProxyServer')[0])
1493                if '=' in proxyServer:
1494                    # Per-protocol settings
1495                    for p in proxyServer.split(';'):
1496                        protocol, address = p.split('=', 1)
1497                        # See if address has a type:// prefix
1498                        import re
1499                        if not re.match('^([^/:]+)://', address):
1500                            address = '%s://%s' % (protocol, address)
1501                        proxies[protocol] = address
1502                else:
1503                    # Use one setting for all protocols
1504                    if proxyServer[:5] == 'http:':
1505                        proxies['http'] = proxyServer
1506                    else:
1507                        proxies['http'] = 'http://%s' % proxyServer
1508                        proxies['https'] = 'https://%s' % proxyServer
1509                        proxies['ftp'] = 'ftp://%s' % proxyServer
1510            internetSettings.Close()
1511        except (WindowsError, ValueError, TypeError):
1512            # Either registry key not found etc, or the value in an
1513            # unexpected format.
1514            # proxies already set up to be empty so nothing to do
1515            pass
1516        return proxies
1517
1518    def getproxies():
1519        """Return a dictionary of scheme -> proxy server URL mappings.
1520
1521        Returns settings gathered from the environment, if specified,
1522        or the registry.
1523
1524        """
1525        return getproxies_environment() or getproxies_registry()
1526
1527    def proxy_bypass_registry(host):
1528        try:
1529            import _winreg
1530            import re
1531        except ImportError:
1532            # Std modules, so should be around - but you never know!
1533            return 0
1534        try:
1535            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1536                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1537            proxyEnable = _winreg.QueryValueEx(internetSettings,
1538                                               'ProxyEnable')[0]
1539            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1540                                                     'ProxyOverride')[0])
1541            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1542        except WindowsError:
1543            return 0
1544        if not proxyEnable or not proxyOverride:
1545            return 0
1546        # try to make a host list from name and IP address.
1547        rawHost, port = splitport(host)
1548        host = [rawHost]
1549        try:
1550            addr = socket.gethostbyname(rawHost)
1551            if addr != rawHost:
1552                host.append(addr)
1553        except socket.error:
1554            pass
1555        try:
1556            fqdn = socket.getfqdn(rawHost)
1557            if fqdn != rawHost:
1558                host.append(fqdn)
1559        except socket.error:
1560            pass
1561        # make a check value list from the registry entry: replace the
1562        # '<local>' string by the localhost entry and the corresponding
1563        # canonical entry.
1564        proxyOverride = proxyOverride.split(';')
1565        # now check if we match one of the registry values.
1566        for test in proxyOverride:
1567            if test == '<local>':
1568                if '.' not in rawHost:
1569                    return 1
1570            test = test.replace(".", r"\.")     # mask dots
1571            test = test.replace("*", r".*")     # change glob sequence
1572            test = test.replace("?", r".")      # change glob char
1573            for val in host:
1574                # print "%s <--> %s" %( test, val )
1575                if re.match(test, val, re.I):
1576                    return 1
1577        return 0
1578
1579    def proxy_bypass(host):
1580        """Return a dictionary of scheme -> proxy server URL mappings.
1581
1582        Returns settings gathered from the environment, if specified,
1583        or the registry.
1584
1585        """
1586        if getproxies_environment():
1587            return proxy_bypass_environment(host)
1588        else:
1589            return proxy_bypass_registry(host)
1590
1591else:
1592    # By default use environment variables
1593    getproxies = getproxies_environment
1594    proxy_bypass = proxy_bypass_environment
1595
1596# Test and time quote() and unquote()
1597def test1():
1598    s = ''
1599    for i in range(256): s = s + chr(i)
1600    s = s*4
1601    t0 = time.time()
1602    qs = quote(s)
1603    uqs = unquote(qs)
1604    t1 = time.time()
1605    if uqs != s:
1606        print 'Wrong!'
1607    print repr(s)
1608    print repr(qs)
1609    print repr(uqs)
1610    print round(t1 - t0, 3), 'sec'
1611
1612
1613def reporthook(blocknum, blocksize, totalsize):
1614    # Report during remote transfers
1615    print "Block number: %d, Block size: %d, Total size: %d" % (
1616        blocknum, blocksize, totalsize)
1617