urllib.py revision bcd833f30f77160e321056fa548d76e2abe26701
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30import base64
31
32from urlparse import urljoin as basejoin
33
34__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
35           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
36           "urlencode", "url2pathname", "pathname2url", "splittag",
37           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
38           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
39           "splitnport", "splitquery", "splitattr", "splitvalue",
40           "getproxies"]
41
42__version__ = '1.17'    # XXX This version is not always updated :-(
43
44MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
45
46# Helper for non-unix systems
47if os.name == 'nt':
48    from nturl2path import url2pathname, pathname2url
49elif os.name == 'riscos':
50    from rourl2path import url2pathname, pathname2url
51else:
52    def url2pathname(pathname):
53        """OS-specific conversion from a relative URL of the 'file' scheme
54        to a file system path; not recommended for general use."""
55        return unquote(pathname)
56
57    def pathname2url(pathname):
58        """OS-specific conversion from a file system path to a relative URL
59        of the 'file' scheme; not recommended for general use."""
60        return quote(pathname)
61
62# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64#     (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
71def urlopen(url, data=None, proxies=None):
72    """Create a file-like object for the specified URL to read from."""
73    from warnings import warnpy3k
74    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
75             "favor of urllib2.urlopen()", stacklevel=2)
76
77    global _urlopener
78    if proxies is not None:
79        opener = FancyURLopener(proxies=proxies)
80    elif not _urlopener:
81        opener = FancyURLopener()
82        _urlopener = opener
83    else:
84        opener = _urlopener
85    if data is None:
86        return opener.open(url)
87    else:
88        return opener.open(url, data)
89def urlretrieve(url, filename=None, reporthook=None, data=None):
90    global _urlopener
91    if not _urlopener:
92        _urlopener = FancyURLopener()
93    return _urlopener.retrieve(url, filename, reporthook, data)
94def urlcleanup():
95    if _urlopener:
96        _urlopener.cleanup()
97    _safe_quoters.clear()
98    ftpcache.clear()
99
100# check for SSL
101try:
102    import ssl
103except:
104    _have_ssl = False
105else:
106    _have_ssl = True
107
108# exception raised when downloaded size does not match content-length
109class ContentTooShortError(IOError):
110    def __init__(self, message, content):
111        IOError.__init__(self, message)
112        self.content = content
113
114ftpcache = {}
115class URLopener:
116    """Class to open URLs.
117    This is a class rather than just a subroutine because we may need
118    more than one set of global protocol-specific options.
119    Note -- this is a base class for those who don't want the
120    automatic handling of errors type 302 (relocated) and 401
121    (authorization needed)."""
122
123    __tempfiles = None
124
125    version = "Python-urllib/%s" % __version__
126
127    # Constructor
128    def __init__(self, proxies=None, **x509):
129        if proxies is None:
130            proxies = getproxies()
131        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
132        self.proxies = proxies
133        self.key_file = x509.get('key_file')
134        self.cert_file = x509.get('cert_file')
135        self.addheaders = [('User-Agent', self.version)]
136        self.__tempfiles = []
137        self.__unlink = os.unlink # See cleanup()
138        self.tempcache = None
139        # Undocumented feature: if you assign {} to tempcache,
140        # it is used to cache files retrieved with
141        # self.retrieve().  This is not enabled by default
142        # since it does not work for changing documents (and I
143        # haven't got the logic to check expiration headers
144        # yet).
145        self.ftpcache = ftpcache
146        # Undocumented feature: you can use a different
147        # ftp cache by assigning to the .ftpcache member;
148        # in case you want logically independent URL openers
149        # XXX This is not threadsafe.  Bah.
150
151    def __del__(self):
152        self.close()
153
154    def close(self):
155        self.cleanup()
156
157    def cleanup(self):
158        # This code sometimes runs when the rest of this module
159        # has already been deleted, so it can't use any globals
160        # or import anything.
161        if self.__tempfiles:
162            for file in self.__tempfiles:
163                try:
164                    self.__unlink(file)
165                except OSError:
166                    pass
167            del self.__tempfiles[:]
168        if self.tempcache:
169            self.tempcache.clear()
170
171    def addheader(self, *args):
172        """Add a header to be used by the HTTP interface only
173        e.g. u.addheader('Accept', 'sound/basic')"""
174        self.addheaders.append(args)
175
176    # External interface
177    def open(self, fullurl, data=None):
178        """Use URLopener().open(file) instead of open(file, 'r')."""
179        fullurl = unwrap(toBytes(fullurl))
180        # percent encode url, fixing lame server errors for e.g, like space
181        # within url paths.
182        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
183        if self.tempcache and fullurl in self.tempcache:
184            filename, headers = self.tempcache[fullurl]
185            fp = open(filename, 'rb')
186            return addinfourl(fp, headers, fullurl)
187        urltype, url = splittype(fullurl)
188        if not urltype:
189            urltype = 'file'
190        if urltype in self.proxies:
191            proxy = self.proxies[urltype]
192            urltype, proxyhost = splittype(proxy)
193            host, selector = splithost(proxyhost)
194            url = (host, fullurl) # Signal special case to open_*()
195        else:
196            proxy = None
197        name = 'open_' + urltype
198        self.type = urltype
199        name = name.replace('-', '_')
200        if not hasattr(self, name):
201            if proxy:
202                return self.open_unknown_proxy(proxy, fullurl, data)
203            else:
204                return self.open_unknown(fullurl, data)
205        try:
206            if data is None:
207                return getattr(self, name)(url)
208            else:
209                return getattr(self, name)(url, data)
210        except socket.error, msg:
211            raise IOError, ('socket error', msg), sys.exc_info()[2]
212
213    def open_unknown(self, fullurl, data=None):
214        """Overridable interface to open unknown URL type."""
215        type, url = splittype(fullurl)
216        raise IOError, ('url error', 'unknown url type', type)
217
218    def open_unknown_proxy(self, proxy, fullurl, data=None):
219        """Overridable interface to open unknown URL type."""
220        type, url = splittype(fullurl)
221        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
222
223    # External interface
224    def retrieve(self, url, filename=None, reporthook=None, data=None):
225        """retrieve(url) returns (filename, headers) for a local object
226        or (tempfilename, headers) for a remote object."""
227        url = unwrap(toBytes(url))
228        if self.tempcache and url in self.tempcache:
229            return self.tempcache[url]
230        type, url1 = splittype(url)
231        if filename is None and (not type or type == 'file'):
232            try:
233                fp = self.open_local_file(url1)
234                hdrs = fp.info()
235                fp.close()
236                return url2pathname(splithost(url1)[1]), hdrs
237            except IOError:
238                pass
239        fp = self.open(url, data)
240        try:
241            headers = fp.info()
242            if filename:
243                tfp = open(filename, 'wb')
244            else:
245                import tempfile
246                garbage, path = splittype(url)
247                garbage, path = splithost(path or "")
248                path, garbage = splitquery(path or "")
249                path, garbage = splitattr(path or "")
250                suffix = os.path.splitext(path)[1]
251                (fd, filename) = tempfile.mkstemp(suffix)
252                self.__tempfiles.append(filename)
253                tfp = os.fdopen(fd, 'wb')
254            try:
255                result = filename, headers
256                if self.tempcache is not None:
257                    self.tempcache[url] = result
258                bs = 1024*8
259                size = -1
260                read = 0
261                blocknum = 0
262                if "content-length" in headers:
263                    size = int(headers["Content-Length"])
264                if reporthook:
265                    reporthook(blocknum, bs, size)
266                while 1:
267                    block = fp.read(bs)
268                    if block == "":
269                        break
270                    read += len(block)
271                    tfp.write(block)
272                    blocknum += 1
273                    if reporthook:
274                        reporthook(blocknum, bs, size)
275            finally:
276                tfp.close()
277        finally:
278            fp.close()
279
280        # raise exception if actual size does not match content-length header
281        if size >= 0 and read < size:
282            raise ContentTooShortError("retrieval incomplete: got only %i out "
283                                       "of %i bytes" % (read, size), result)
284
285        return result
286
287    # Each method named open_<type> knows how to open that type of URL
288
289    def open_http(self, url, data=None):
290        """Use HTTP protocol."""
291        import httplib
292        user_passwd = None
293        proxy_passwd= None
294        if isinstance(url, str):
295            host, selector = splithost(url)
296            if host:
297                user_passwd, host = splituser(host)
298                host = unquote(host)
299            realhost = host
300        else:
301            host, selector = url
302            # check whether the proxy contains authorization information
303            proxy_passwd, host = splituser(host)
304            # now we proceed with the url we want to obtain
305            urltype, rest = splittype(selector)
306            url = rest
307            user_passwd = None
308            if urltype.lower() != 'http':
309                realhost = None
310            else:
311                realhost, rest = splithost(rest)
312                if realhost:
313                    user_passwd, realhost = splituser(realhost)
314                if user_passwd:
315                    selector = "%s://%s%s" % (urltype, realhost, rest)
316                if proxy_bypass(realhost):
317                    host = realhost
318
319            #print "proxy via http:", host, selector
320        if not host: raise IOError, ('http error', 'no host given')
321
322        if proxy_passwd:
323            proxy_passwd = unquote(proxy_passwd)
324            proxy_auth = base64.b64encode(proxy_passwd).strip()
325        else:
326            proxy_auth = None
327
328        if user_passwd:
329            user_passwd = unquote(user_passwd)
330            auth = base64.b64encode(user_passwd).strip()
331        else:
332            auth = None
333        h = httplib.HTTP(host)
334        if data is not None:
335            h.putrequest('POST', selector)
336            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
337            h.putheader('Content-Length', '%d' % len(data))
338        else:
339            h.putrequest('GET', selector)
340        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
341        if auth: h.putheader('Authorization', 'Basic %s' % auth)
342        if realhost: h.putheader('Host', realhost)
343        for args in self.addheaders: h.putheader(*args)
344        h.endheaders(data)
345        errcode, errmsg, headers = h.getreply()
346        fp = h.getfile()
347        if errcode == -1:
348            if fp: fp.close()
349            # something went wrong with the HTTP status line
350            raise IOError, ('http protocol error', 0,
351                            'got a bad status line', None)
352        # According to RFC 2616, "2xx" code indicates that the client's
353        # request was successfully received, understood, and accepted.
354        if (200 <= errcode < 300):
355            return addinfourl(fp, headers, "http:" + url, errcode)
356        else:
357            if data is None:
358                return self.http_error(url, fp, errcode, errmsg, headers)
359            else:
360                return self.http_error(url, fp, errcode, errmsg, headers, data)
361
362    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
363        """Handle http errors.
364        Derived class can override this, or provide specific handlers
365        named http_error_DDD where DDD is the 3-digit error code."""
366        # First check if there's a specific handler for this error
367        name = 'http_error_%d' % errcode
368        if hasattr(self, name):
369            method = getattr(self, name)
370            if data is None:
371                result = method(url, fp, errcode, errmsg, headers)
372            else:
373                result = method(url, fp, errcode, errmsg, headers, data)
374            if result: return result
375        return self.http_error_default(url, fp, errcode, errmsg, headers)
376
377    def http_error_default(self, url, fp, errcode, errmsg, headers):
378        """Default error handler: close the connection and raise IOError."""
379        fp.close()
380        raise IOError, ('http error', errcode, errmsg, headers)
381
382    if _have_ssl:
383        def open_https(self, url, data=None):
384            """Use HTTPS protocol."""
385
386            import httplib
387            user_passwd = None
388            proxy_passwd = None
389            if isinstance(url, str):
390                host, selector = splithost(url)
391                if host:
392                    user_passwd, host = splituser(host)
393                    host = unquote(host)
394                realhost = host
395            else:
396                host, selector = url
397                # here, we determine, whether the proxy contains authorization information
398                proxy_passwd, host = splituser(host)
399                urltype, rest = splittype(selector)
400                url = rest
401                user_passwd = None
402                if urltype.lower() != 'https':
403                    realhost = None
404                else:
405                    realhost, rest = splithost(rest)
406                    if realhost:
407                        user_passwd, realhost = splituser(realhost)
408                    if user_passwd:
409                        selector = "%s://%s%s" % (urltype, realhost, rest)
410                #print "proxy via https:", host, selector
411            if not host: raise IOError, ('https error', 'no host given')
412            if proxy_passwd:
413                proxy_passwd = unquote(proxy_passwd)
414                proxy_auth = base64.b64encode(proxy_passwd).strip()
415            else:
416                proxy_auth = None
417            if user_passwd:
418                user_passwd = unquote(user_passwd)
419                auth = base64.b64encode(user_passwd).strip()
420            else:
421                auth = None
422            h = httplib.HTTPS(host, 0,
423                              key_file=self.key_file,
424                              cert_file=self.cert_file)
425            if data is not None:
426                h.putrequest('POST', selector)
427                h.putheader('Content-Type',
428                            'application/x-www-form-urlencoded')
429                h.putheader('Content-Length', '%d' % len(data))
430            else:
431                h.putrequest('GET', selector)
432            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
433            if auth: h.putheader('Authorization', 'Basic %s' % auth)
434            if realhost: h.putheader('Host', realhost)
435            for args in self.addheaders: h.putheader(*args)
436            h.endheaders(data)
437            errcode, errmsg, headers = h.getreply()
438            fp = h.getfile()
439            if errcode == -1:
440                if fp: fp.close()
441                # something went wrong with the HTTP status line
442                raise IOError, ('http protocol error', 0,
443                                'got a bad status line', None)
444            # According to RFC 2616, "2xx" code indicates that the client's
445            # request was successfully received, understood, and accepted.
446            if (200 <= errcode < 300):
447                return addinfourl(fp, headers, "https:" + url, errcode)
448            else:
449                if data is None:
450                    return self.http_error(url, fp, errcode, errmsg, headers)
451                else:
452                    return self.http_error(url, fp, errcode, errmsg, headers,
453                                           data)
454
455    def open_file(self, url):
456        """Use local file or FTP depending on form of URL."""
457        if not isinstance(url, str):
458            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
459        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
460            return self.open_ftp(url)
461        else:
462            return self.open_local_file(url)
463
464    def open_local_file(self, url):
465        """Use local file."""
466        import mimetypes, mimetools, email.utils
467        try:
468            from cStringIO import StringIO
469        except ImportError:
470            from StringIO import StringIO
471        host, file = splithost(url)
472        localname = url2pathname(file)
473        try:
474            stats = os.stat(localname)
475        except OSError, e:
476            raise IOError(e.errno, e.strerror, e.filename)
477        size = stats.st_size
478        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
479        mtype = mimetypes.guess_type(url)[0]
480        headers = mimetools.Message(StringIO(
481            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
482            (mtype or 'text/plain', size, modified)))
483        if not host:
484            urlfile = file
485            if file[:1] == '/':
486                urlfile = 'file://' + file
487            return addinfourl(open(localname, 'rb'),
488                              headers, urlfile)
489        host, port = splitport(host)
490        if not port \
491           and socket.gethostbyname(host) in (localhost(), thishost()):
492            urlfile = file
493            if file[:1] == '/':
494                urlfile = 'file://' + file
495            return addinfourl(open(localname, 'rb'),
496                              headers, urlfile)
497        raise IOError, ('local file error', 'not on local host')
498
499    def open_ftp(self, url):
500        """Use FTP protocol."""
501        if not isinstance(url, str):
502            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
503        import mimetypes, mimetools
504        try:
505            from cStringIO import StringIO
506        except ImportError:
507            from StringIO import StringIO
508        host, path = splithost(url)
509        if not host: raise IOError, ('ftp error', 'no host given')
510        host, port = splitport(host)
511        user, host = splituser(host)
512        if user: user, passwd = splitpasswd(user)
513        else: passwd = None
514        host = unquote(host)
515        user = user or ''
516        passwd = passwd or ''
517        host = socket.gethostbyname(host)
518        if not port:
519            import ftplib
520            port = ftplib.FTP_PORT
521        else:
522            port = int(port)
523        path, attrs = splitattr(path)
524        path = unquote(path)
525        dirs = path.split('/')
526        dirs, file = dirs[:-1], dirs[-1]
527        if dirs and not dirs[0]: dirs = dirs[1:]
528        if dirs and not dirs[0]: dirs[0] = '/'
529        key = user, host, port, '/'.join(dirs)
530        # XXX thread unsafe!
531        if len(self.ftpcache) > MAXFTPCACHE:
532            # Prune the cache, rather arbitrarily
533            for k in self.ftpcache.keys():
534                if k != key:
535                    v = self.ftpcache[k]
536                    del self.ftpcache[k]
537                    v.close()
538        try:
539            if not key in self.ftpcache:
540                self.ftpcache[key] = \
541                    ftpwrapper(user, passwd, host, port, dirs)
542            if not file: type = 'D'
543            else: type = 'I'
544            for attr in attrs:
545                attr, value = splitvalue(attr)
546                if attr.lower() == 'type' and \
547                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
548                    type = value.upper()
549            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
550            mtype = mimetypes.guess_type("ftp:" + url)[0]
551            headers = ""
552            if mtype:
553                headers += "Content-Type: %s\n" % mtype
554            if retrlen is not None and retrlen >= 0:
555                headers += "Content-Length: %d\n" % retrlen
556            headers = mimetools.Message(StringIO(headers))
557            return addinfourl(fp, headers, "ftp:" + url)
558        except ftperrors(), msg:
559            raise IOError, ('ftp error', msg), sys.exc_info()[2]
560
561    def open_data(self, url, data=None):
562        """Use "data" URL."""
563        if not isinstance(url, str):
564            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
565        # ignore POSTed data
566        #
567        # syntax of data URLs:
568        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
569        # mediatype := [ type "/" subtype ] *( ";" parameter )
570        # data      := *urlchar
571        # parameter := attribute "=" value
572        import mimetools
573        try:
574            from cStringIO import StringIO
575        except ImportError:
576            from StringIO import StringIO
577        try:
578            [type, data] = url.split(',', 1)
579        except ValueError:
580            raise IOError, ('data error', 'bad data URL')
581        if not type:
582            type = 'text/plain;charset=US-ASCII'
583        semi = type.rfind(';')
584        if semi >= 0 and '=' not in type[semi:]:
585            encoding = type[semi+1:]
586            type = type[:semi]
587        else:
588            encoding = ''
589        msg = []
590        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
591                                            time.gmtime(time.time())))
592        msg.append('Content-type: %s' % type)
593        if encoding == 'base64':
594            data = base64.decodestring(data)
595        else:
596            data = unquote(data)
597        msg.append('Content-Length: %d' % len(data))
598        msg.append('')
599        msg.append(data)
600        msg = '\n'.join(msg)
601        f = StringIO(msg)
602        headers = mimetools.Message(f, 0)
603        #f.fileno = None     # needed for addinfourl
604        return addinfourl(f, headers, url)
605
606
607class FancyURLopener(URLopener):
608    """Derived class with handlers for errors we can handle (perhaps)."""
609
610    def __init__(self, *args, **kwargs):
611        URLopener.__init__(self, *args, **kwargs)
612        self.auth_cache = {}
613        self.tries = 0
614        self.maxtries = 10
615
616    def http_error_default(self, url, fp, errcode, errmsg, headers):
617        """Default error handling -- don't raise an exception."""
618        return addinfourl(fp, headers, "http:" + url, errcode)
619
620    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
621        """Error 302 -- relocated (temporarily)."""
622        self.tries += 1
623        if self.maxtries and self.tries >= self.maxtries:
624            if hasattr(self, "http_error_500"):
625                meth = self.http_error_500
626            else:
627                meth = self.http_error_default
628            self.tries = 0
629            return meth(url, fp, 500,
630                        "Internal Server Error: Redirect Recursion", headers)
631        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
632                                        data)
633        self.tries = 0
634        return result
635
636    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
637        if 'location' in headers:
638            newurl = headers['location']
639        elif 'uri' in headers:
640            newurl = headers['uri']
641        else:
642            return
643        fp.close()
644        # In case the server sent a relative URL, join with original:
645        newurl = basejoin(self.type + ":" + url, newurl)
646
647        # For security reasons we do not allow redirects to protocols
648        # other than HTTP, HTTPS or FTP.
649        newurl_lower = newurl.lower()
650        if not (newurl_lower.startswith('http://') or
651                newurl_lower.startswith('https://') or
652                newurl_lower.startswith('ftp://')):
653            raise IOError('redirect error', errcode,
654                          errmsg + " - Redirection to url '%s' is not allowed" %
655                          newurl,
656                          headers)
657
658        return self.open(newurl)
659
660    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
661        """Error 301 -- also relocated (permanently)."""
662        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
663
664    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
665        """Error 303 -- also relocated (essentially identical to 302)."""
666        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
667
668    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
669        """Error 307 -- relocated, but turn POST into error."""
670        if data is None:
671            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
672        else:
673            return self.http_error_default(url, fp, errcode, errmsg, headers)
674
675    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
676        """Error 401 -- authentication required.
677        This function supports Basic authentication only."""
678        if not 'www-authenticate' in headers:
679            URLopener.http_error_default(self, url, fp,
680                                         errcode, errmsg, headers)
681        stuff = headers['www-authenticate']
682        import re
683        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
684        if not match:
685            URLopener.http_error_default(self, url, fp,
686                                         errcode, errmsg, headers)
687        scheme, realm = match.groups()
688        if scheme.lower() != 'basic':
689            URLopener.http_error_default(self, url, fp,
690                                         errcode, errmsg, headers)
691        name = 'retry_' + self.type + '_basic_auth'
692        if data is None:
693            return getattr(self,name)(url, realm)
694        else:
695            return getattr(self,name)(url, realm, data)
696
697    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
698        """Error 407 -- proxy authentication required.
699        This function supports Basic authentication only."""
700        if not 'proxy-authenticate' in headers:
701            URLopener.http_error_default(self, url, fp,
702                                         errcode, errmsg, headers)
703        stuff = headers['proxy-authenticate']
704        import re
705        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
706        if not match:
707            URLopener.http_error_default(self, url, fp,
708                                         errcode, errmsg, headers)
709        scheme, realm = match.groups()
710        if scheme.lower() != 'basic':
711            URLopener.http_error_default(self, url, fp,
712                                         errcode, errmsg, headers)
713        name = 'retry_proxy_' + self.type + '_basic_auth'
714        if data is None:
715            return getattr(self,name)(url, realm)
716        else:
717            return getattr(self,name)(url, realm, data)
718
719    def retry_proxy_http_basic_auth(self, url, realm, data=None):
720        host, selector = splithost(url)
721        newurl = 'http://' + host + selector
722        proxy = self.proxies['http']
723        urltype, proxyhost = splittype(proxy)
724        proxyhost, proxyselector = splithost(proxyhost)
725        i = proxyhost.find('@') + 1
726        proxyhost = proxyhost[i:]
727        user, passwd = self.get_user_passwd(proxyhost, realm, i)
728        if not (user or passwd): return None
729        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
730        self.proxies['http'] = 'http://' + proxyhost + proxyselector
731        if data is None:
732            return self.open(newurl)
733        else:
734            return self.open(newurl, data)
735
736    def retry_proxy_https_basic_auth(self, url, realm, data=None):
737        host, selector = splithost(url)
738        newurl = 'https://' + host + selector
739        proxy = self.proxies['https']
740        urltype, proxyhost = splittype(proxy)
741        proxyhost, proxyselector = splithost(proxyhost)
742        i = proxyhost.find('@') + 1
743        proxyhost = proxyhost[i:]
744        user, passwd = self.get_user_passwd(proxyhost, realm, i)
745        if not (user or passwd): return None
746        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
747        self.proxies['https'] = 'https://' + proxyhost + proxyselector
748        if data is None:
749            return self.open(newurl)
750        else:
751            return self.open(newurl, data)
752
753    def retry_http_basic_auth(self, url, realm, data=None):
754        host, selector = splithost(url)
755        i = host.find('@') + 1
756        host = host[i:]
757        user, passwd = self.get_user_passwd(host, realm, i)
758        if not (user or passwd): return None
759        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
760        newurl = 'http://' + host + selector
761        if data is None:
762            return self.open(newurl)
763        else:
764            return self.open(newurl, data)
765
766    def retry_https_basic_auth(self, url, realm, data=None):
767        host, selector = splithost(url)
768        i = host.find('@') + 1
769        host = host[i:]
770        user, passwd = self.get_user_passwd(host, realm, i)
771        if not (user or passwd): return None
772        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
773        newurl = 'https://' + host + selector
774        if data is None:
775            return self.open(newurl)
776        else:
777            return self.open(newurl, data)
778
779    def get_user_passwd(self, host, realm, clear_cache=0):
780        key = realm + '@' + host.lower()
781        if key in self.auth_cache:
782            if clear_cache:
783                del self.auth_cache[key]
784            else:
785                return self.auth_cache[key]
786        user, passwd = self.prompt_user_passwd(host, realm)
787        if user or passwd: self.auth_cache[key] = (user, passwd)
788        return user, passwd
789
790    def prompt_user_passwd(self, host, realm):
791        """Override this in a GUI environment!"""
792        import getpass
793        try:
794            user = raw_input("Enter username for %s at %s: " % (realm,
795                                                                host))
796            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
797                (user, realm, host))
798            return user, passwd
799        except KeyboardInterrupt:
800            print
801            return None, None
802
803
804# Utility functions
805
806_localhost = None
807def localhost():
808    """Return the IP address of the magic hostname 'localhost'."""
809    global _localhost
810    if _localhost is None:
811        _localhost = socket.gethostbyname('localhost')
812    return _localhost
813
814_thishost = None
815def thishost():
816    """Return the IP address of the current host."""
817    global _thishost
818    if _thishost is None:
819        _thishost = socket.gethostbyname(socket.gethostname())
820    return _thishost
821
822_ftperrors = None
823def ftperrors():
824    """Return the set of errors raised by the FTP class."""
825    global _ftperrors
826    if _ftperrors is None:
827        import ftplib
828        _ftperrors = ftplib.all_errors
829    return _ftperrors
830
831_noheaders = None
832def noheaders():
833    """Return an empty mimetools.Message object."""
834    global _noheaders
835    if _noheaders is None:
836        import mimetools
837        try:
838            from cStringIO import StringIO
839        except ImportError:
840            from StringIO import StringIO
841        _noheaders = mimetools.Message(StringIO(), 0)
842        _noheaders.fp.close()   # Recycle file descriptor
843    return _noheaders
844
845
846# Utility classes
847
848class ftpwrapper:
849    """Class used by open_ftp() for cache of open FTP connections."""
850
851    def __init__(self, user, passwd, host, port, dirs,
852                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
853                 persistent=True):
854        self.user = user
855        self.passwd = passwd
856        self.host = host
857        self.port = port
858        self.dirs = dirs
859        self.timeout = timeout
860        self.refcount = 0
861        self.keepalive = persistent
862        self.init()
863
864    def init(self):
865        import ftplib
866        self.busy = 0
867        self.ftp = ftplib.FTP()
868        self.ftp.connect(self.host, self.port, self.timeout)
869        self.ftp.login(self.user, self.passwd)
870        for dir in self.dirs:
871            self.ftp.cwd(dir)
872
873    def retrfile(self, file, type):
874        import ftplib
875        self.endtransfer()
876        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
877        else: cmd = 'TYPE ' + type; isdir = 0
878        try:
879            self.ftp.voidcmd(cmd)
880        except ftplib.all_errors:
881            self.init()
882            self.ftp.voidcmd(cmd)
883        conn = None
884        if file and not isdir:
885            # Try to retrieve as a file
886            try:
887                cmd = 'RETR ' + file
888                conn, retrlen = self.ftp.ntransfercmd(cmd)
889            except ftplib.error_perm, reason:
890                if str(reason)[:3] != '550':
891                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
892        if not conn:
893            # Set transfer mode to ASCII!
894            self.ftp.voidcmd('TYPE A')
895            # Try a directory listing. Verify that directory exists.
896            if file:
897                pwd = self.ftp.pwd()
898                try:
899                    try:
900                        self.ftp.cwd(file)
901                    except ftplib.error_perm, reason:
902                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
903                finally:
904                    self.ftp.cwd(pwd)
905                cmd = 'LIST ' + file
906            else:
907                cmd = 'LIST'
908            conn, retrlen = self.ftp.ntransfercmd(cmd)
909        self.busy = 1
910        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
911        self.refcount += 1
912        conn.close()
913        # Pass back both a suitably decorated object and a retrieval length
914        return (ftpobj, retrlen)
915
916    def endtransfer(self):
917        if not self.busy:
918            return
919        self.busy = 0
920        try:
921            self.ftp.voidresp()
922        except ftperrors():
923            pass
924
925    def close(self):
926        self.keepalive = False
927        if self.refcount <= 0:
928            self.real_close()
929
930    def file_close(self):
931        self.endtransfer()
932        self.refcount -= 1
933        if self.refcount <= 0 and not self.keepalive:
934            self.real_close()
935
936    def real_close(self):
937        self.endtransfer()
938        try:
939            self.ftp.close()
940        except ftperrors():
941            pass
942
943class addbase:
944    """Base class for addinfo and addclosehook."""
945
946    def __init__(self, fp):
947        self.fp = fp
948        self.read = self.fp.read
949        self.readline = self.fp.readline
950        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
951        if hasattr(self.fp, "fileno"):
952            self.fileno = self.fp.fileno
953        else:
954            self.fileno = lambda: None
955        if hasattr(self.fp, "__iter__"):
956            self.__iter__ = self.fp.__iter__
957            if hasattr(self.fp, "next"):
958                self.next = self.fp.next
959
960    def __repr__(self):
961        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
962                                             id(self), self.fp)
963
964    def close(self):
965        self.read = None
966        self.readline = None
967        self.readlines = None
968        self.fileno = None
969        if self.fp: self.fp.close()
970        self.fp = None
971
972class addclosehook(addbase):
973    """Class to add a close hook to an open file."""
974
975    def __init__(self, fp, closehook, *hookargs):
976        addbase.__init__(self, fp)
977        self.closehook = closehook
978        self.hookargs = hookargs
979
980    def close(self):
981        addbase.close(self)
982        if self.closehook:
983            self.closehook(*self.hookargs)
984            self.closehook = None
985            self.hookargs = None
986
987class addinfo(addbase):
988    """class to add an info() method to an open file."""
989
990    def __init__(self, fp, headers):
991        addbase.__init__(self, fp)
992        self.headers = headers
993
994    def info(self):
995        return self.headers
996
997class addinfourl(addbase):
998    """class to add info() and geturl() methods to an open file."""
999
1000    def __init__(self, fp, headers, url, code=None):
1001        addbase.__init__(self, fp)
1002        self.headers = headers
1003        self.url = url
1004        self.code = code
1005
1006    def info(self):
1007        return self.headers
1008
1009    def getcode(self):
1010        return self.code
1011
1012    def geturl(self):
1013        return self.url
1014
1015
1016# Utilities to parse URLs (most of these return None for missing parts):
1017# unwrap('<URL:type://host/path>') --> 'type://host/path'
1018# splittype('type:opaquestring') --> 'type', 'opaquestring'
1019# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1020# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1021# splitpasswd('user:passwd') -> 'user', 'passwd'
1022# splitport('host:port') --> 'host', 'port'
1023# splitquery('/path?query') --> '/path', 'query'
1024# splittag('/path#tag') --> '/path', 'tag'
1025# splitattr('/path;attr1=value1;attr2=value2;...') ->
1026#   '/path', ['attr1=value1', 'attr2=value2', ...]
1027# splitvalue('attr=value') --> 'attr', 'value'
1028# unquote('abc%20def') -> 'abc def'
1029# quote('abc def') -> 'abc%20def')
1030
1031try:
1032    unicode
1033except NameError:
1034    def _is_unicode(x):
1035        return 0
1036else:
1037    def _is_unicode(x):
1038        return isinstance(x, unicode)
1039
1040def toBytes(url):
1041    """toBytes(u"URL") --> 'URL'."""
1042    # Most URL schemes require ASCII. If that changes, the conversion
1043    # can be relaxed
1044    if _is_unicode(url):
1045        try:
1046            url = url.encode("ASCII")
1047        except UnicodeError:
1048            raise UnicodeError("URL " + repr(url) +
1049                               " contains non-ASCII characters")
1050    return url
1051
1052def unwrap(url):
1053    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1054    url = url.strip()
1055    if url[:1] == '<' and url[-1:] == '>':
1056        url = url[1:-1].strip()
1057    if url[:4] == 'URL:': url = url[4:].strip()
1058    return url
1059
1060_typeprog = None
1061def splittype(url):
1062    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1063    global _typeprog
1064    if _typeprog is None:
1065        import re
1066        _typeprog = re.compile('^([^/:]+):')
1067
1068    match = _typeprog.match(url)
1069    if match:
1070        scheme = match.group(1)
1071        return scheme.lower(), url[len(scheme) + 1:]
1072    return None, url
1073
1074_hostprog = None
1075def splithost(url):
1076    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1077    global _hostprog
1078    if _hostprog is None:
1079        import re
1080        _hostprog = re.compile('^//([^/?]*)(.*)$')
1081
1082    match = _hostprog.match(url)
1083    if match:
1084        host_port = match.group(1)
1085        path = match.group(2)
1086        if path and not path.startswith('/'):
1087            path = '/' + path
1088        return host_port, path
1089    return None, url
1090
1091_userprog = None
1092def splituser(host):
1093    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1094    global _userprog
1095    if _userprog is None:
1096        import re
1097        _userprog = re.compile('^(.*)@(.*)$')
1098
1099    match = _userprog.match(host)
1100    if match: return match.group(1, 2)
1101    return None, host
1102
1103_passwdprog = None
1104def splitpasswd(user):
1105    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1106    global _passwdprog
1107    if _passwdprog is None:
1108        import re
1109        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1110
1111    match = _passwdprog.match(user)
1112    if match: return match.group(1, 2)
1113    return user, None
1114
1115# splittag('/path#tag') --> '/path', 'tag'
1116_portprog = None
1117def splitport(host):
1118    """splitport('host:port') --> 'host', 'port'."""
1119    global _portprog
1120    if _portprog is None:
1121        import re
1122        _portprog = re.compile('^(.*):([0-9]+)$')
1123
1124    match = _portprog.match(host)
1125    if match: return match.group(1, 2)
1126    return host, None
1127
1128_nportprog = None
1129def splitnport(host, defport=-1):
1130    """Split host and port, returning numeric port.
1131    Return given default port if no ':' found; defaults to -1.
1132    Return numerical port if a valid number are found after ':'.
1133    Return None if ':' but not a valid number."""
1134    global _nportprog
1135    if _nportprog is None:
1136        import re
1137        _nportprog = re.compile('^(.*):(.*)$')
1138
1139    match = _nportprog.match(host)
1140    if match:
1141        host, port = match.group(1, 2)
1142        try:
1143            if not port: raise ValueError, "no digits"
1144            nport = int(port)
1145        except ValueError:
1146            nport = None
1147        return host, nport
1148    return host, defport
1149
1150_queryprog = None
1151def splitquery(url):
1152    """splitquery('/path?query') --> '/path', 'query'."""
1153    global _queryprog
1154    if _queryprog is None:
1155        import re
1156        _queryprog = re.compile('^(.*)\?([^?]*)$')
1157
1158    match = _queryprog.match(url)
1159    if match: return match.group(1, 2)
1160    return url, None
1161
1162_tagprog = None
1163def splittag(url):
1164    """splittag('/path#tag') --> '/path', 'tag'."""
1165    global _tagprog
1166    if _tagprog is None:
1167        import re
1168        _tagprog = re.compile('^(.*)#([^#]*)$')
1169
1170    match = _tagprog.match(url)
1171    if match: return match.group(1, 2)
1172    return url, None
1173
1174def splitattr(url):
1175    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1176        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1177    words = url.split(';')
1178    return words[0], words[1:]
1179
1180_valueprog = None
1181def splitvalue(attr):
1182    """splitvalue('attr=value') --> 'attr', 'value'."""
1183    global _valueprog
1184    if _valueprog is None:
1185        import re
1186        _valueprog = re.compile('^([^=]*)=(.*)$')
1187
1188    match = _valueprog.match(attr)
1189    if match: return match.group(1, 2)
1190    return attr, None
1191
1192# urlparse contains a duplicate of this method to avoid a circular import.  If
1193# you update this method, also update the copy in urlparse.  This code
1194# duplication does not exist in Python3.
1195
1196_hexdig = '0123456789ABCDEFabcdef'
1197_hextochr = dict((a + b, chr(int(a + b, 16)))
1198                 for a in _hexdig for b in _hexdig)
1199
1200def unquote(s):
1201    """unquote('abc%20def') -> 'abc def'."""
1202    res = s.split('%')
1203    # fastpath
1204    if len(res) == 1:
1205        return s
1206    s = res[0]
1207    for item in res[1:]:
1208        try:
1209            s += _hextochr[item[:2]] + item[2:]
1210        except KeyError:
1211            s += '%' + item
1212        except UnicodeDecodeError:
1213            s += unichr(int(item[:2], 16)) + item[2:]
1214    return s
1215
1216def unquote_plus(s):
1217    """unquote('%7e/abc+def') -> '~/abc def'"""
1218    s = s.replace('+', ' ')
1219    return unquote(s)
1220
1221always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1222               'abcdefghijklmnopqrstuvwxyz'
1223               '0123456789' '_.-')
1224_safe_map = {}
1225for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1226    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1227_safe_quoters = {}
1228
1229def quote(s, safe='/'):
1230    """quote('abc def') -> 'abc%20def'
1231
1232    Each part of a URL, e.g. the path info, the query, etc., has a
1233    different set of reserved characters that must be quoted.
1234
1235    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1236    the following reserved characters.
1237
1238    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1239                  "$" | ","
1240
1241    Each of these characters is reserved in some component of a URL,
1242    but not necessarily in all of them.
1243
1244    By default, the quote function is intended for quoting the path
1245    section of a URL.  Thus, it will not encode '/'.  This character
1246    is reserved, but in typical usage the quote function is being
1247    called on a path where the existing slash characters are used as
1248    reserved characters.
1249    """
1250    # fastpath
1251    if not s:
1252        if s is None:
1253            raise TypeError('None object cannot be quoted')
1254        return s
1255    cachekey = (safe, always_safe)
1256    try:
1257        (quoter, safe) = _safe_quoters[cachekey]
1258    except KeyError:
1259        safe_map = _safe_map.copy()
1260        safe_map.update([(c, c) for c in safe])
1261        quoter = safe_map.__getitem__
1262        safe = always_safe + safe
1263        _safe_quoters[cachekey] = (quoter, safe)
1264    if not s.rstrip(safe):
1265        return s
1266    return ''.join(map(quoter, s))
1267
1268def quote_plus(s, safe=''):
1269    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1270    if ' ' in s:
1271        s = quote(s, safe + ' ')
1272        return s.replace(' ', '+')
1273    return quote(s, safe)
1274
1275def urlencode(query, doseq=0):
1276    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1277
1278    If any values in the query arg are sequences and doseq is true, each
1279    sequence element is converted to a separate parameter.
1280
1281    If the query arg is a sequence of two-element tuples, the order of the
1282    parameters in the output will match the order of parameters in the
1283    input.
1284    """
1285
1286    if hasattr(query,"items"):
1287        # mapping objects
1288        query = query.items()
1289    else:
1290        # it's a bother at times that strings and string-like objects are
1291        # sequences...
1292        try:
1293            # non-sequence items should not work with len()
1294            # non-empty strings will fail this
1295            if len(query) and not isinstance(query[0], tuple):
1296                raise TypeError
1297            # zero-length sequences of all types will get here and succeed,
1298            # but that's a minor nit - since the original implementation
1299            # allowed empty dicts that type of behavior probably should be
1300            # preserved for consistency
1301        except TypeError:
1302            ty,va,tb = sys.exc_info()
1303            raise TypeError, "not a valid non-string sequence or mapping object", tb
1304
1305    l = []
1306    if not doseq:
1307        # preserve old behavior
1308        for k, v in query:
1309            k = quote_plus(str(k))
1310            v = quote_plus(str(v))
1311            l.append(k + '=' + v)
1312    else:
1313        for k, v in query:
1314            k = quote_plus(str(k))
1315            if isinstance(v, str):
1316                v = quote_plus(v)
1317                l.append(k + '=' + v)
1318            elif _is_unicode(v):
1319                # is there a reasonable way to convert to ASCII?
1320                # encode generates a string, but "replace" or "ignore"
1321                # lose information and "strict" can raise UnicodeError
1322                v = quote_plus(v.encode("ASCII","replace"))
1323                l.append(k + '=' + v)
1324            else:
1325                try:
1326                    # is this a sufficient test for sequence-ness?
1327                    len(v)
1328                except TypeError:
1329                    # not a sequence
1330                    v = quote_plus(str(v))
1331                    l.append(k + '=' + v)
1332                else:
1333                    # loop over the sequence
1334                    for elt in v:
1335                        l.append(k + '=' + quote_plus(str(elt)))
1336    return '&'.join(l)
1337
1338# Proxy handling
1339def getproxies_environment():
1340    """Return a dictionary of scheme -> proxy server URL mappings.
1341
1342    Scan the environment for variables named <scheme>_proxy;
1343    this seems to be the standard convention.  If you need a
1344    different way, you can pass a proxies dictionary to the
1345    [Fancy]URLopener constructor.
1346
1347    """
1348    proxies = {}
1349    for name, value in os.environ.items():
1350        name = name.lower()
1351        if value and name[-6:] == '_proxy':
1352            proxies[name[:-6]] = value
1353    return proxies
1354
1355def proxy_bypass_environment(host):
1356    """Test if proxies should not be used for a particular host.
1357
1358    Checks the environment for a variable named no_proxy, which should
1359    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1360    """
1361    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1362    # '*' is special case for always bypass
1363    if no_proxy == '*':
1364        return 1
1365    # strip port off host
1366    hostonly, port = splitport(host)
1367    # check if the host ends with any of the DNS suffixes
1368    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1369    for name in no_proxy_list:
1370        if name and (hostonly.endswith(name) or host.endswith(name)):
1371            return 1
1372    # otherwise, don't bypass
1373    return 0
1374
1375
1376if sys.platform == 'darwin':
1377    from _scproxy import _get_proxy_settings, _get_proxies
1378
1379    def proxy_bypass_macosx_sysconf(host):
1380        """
1381        Return True iff this host shouldn't be accessed using a proxy
1382
1383        This function uses the MacOSX framework SystemConfiguration
1384        to fetch the proxy information.
1385        """
1386        import re
1387        import socket
1388        from fnmatch import fnmatch
1389
1390        hostonly, port = splitport(host)
1391
1392        def ip2num(ipAddr):
1393            parts = ipAddr.split('.')
1394            parts = map(int, parts)
1395            if len(parts) != 4:
1396                parts = (parts + [0, 0, 0, 0])[:4]
1397            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1398
1399        proxy_settings = _get_proxy_settings()
1400
1401        # Check for simple host names:
1402        if '.' not in host:
1403            if proxy_settings['exclude_simple']:
1404                return True
1405
1406        hostIP = None
1407
1408        for value in proxy_settings.get('exceptions', ()):
1409            # Items in the list are strings like these: *.local, 169.254/16
1410            if not value: continue
1411
1412            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1413            if m is not None:
1414                if hostIP is None:
1415                    try:
1416                        hostIP = socket.gethostbyname(hostonly)
1417                        hostIP = ip2num(hostIP)
1418                    except socket.error:
1419                        continue
1420
1421                base = ip2num(m.group(1))
1422                mask = m.group(2)
1423                if mask is None:
1424                    mask = 8 * (m.group(1).count('.') + 1)
1425
1426                else:
1427                    mask = int(mask[1:])
1428                mask = 32 - mask
1429
1430                if (hostIP >> mask) == (base >> mask):
1431                    return True
1432
1433            elif fnmatch(host, value):
1434                return True
1435
1436        return False
1437
1438    def getproxies_macosx_sysconf():
1439        """Return a dictionary of scheme -> proxy server URL mappings.
1440
1441        This function uses the MacOSX framework SystemConfiguration
1442        to fetch the proxy information.
1443        """
1444        return _get_proxies()
1445
1446    def proxy_bypass(host):
1447        if getproxies_environment():
1448            return proxy_bypass_environment(host)
1449        else:
1450            return proxy_bypass_macosx_sysconf(host)
1451
1452    def getproxies():
1453        return getproxies_environment() or getproxies_macosx_sysconf()
1454
1455elif os.name == 'nt':
1456    def getproxies_registry():
1457        """Return a dictionary of scheme -> proxy server URL mappings.
1458
1459        Win32 uses the registry to store proxies.
1460
1461        """
1462        proxies = {}
1463        try:
1464            import _winreg
1465        except ImportError:
1466            # Std module, so should be around - but you never know!
1467            return proxies
1468        try:
1469            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1470                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1471            proxyEnable = _winreg.QueryValueEx(internetSettings,
1472                                               'ProxyEnable')[0]
1473            if proxyEnable:
1474                # Returned as Unicode but problems if not converted to ASCII
1475                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1476                                                       'ProxyServer')[0])
1477                if '=' in proxyServer:
1478                    # Per-protocol settings
1479                    for p in proxyServer.split(';'):
1480                        protocol, address = p.split('=', 1)
1481                        # See if address has a type:// prefix
1482                        import re
1483                        if not re.match('^([^/:]+)://', address):
1484                            address = '%s://%s' % (protocol, address)
1485                        proxies[protocol] = address
1486                else:
1487                    # Use one setting for all protocols
1488                    if proxyServer[:5] == 'http:':
1489                        proxies['http'] = proxyServer
1490                    else:
1491                        proxies['http'] = 'http://%s' % proxyServer
1492                        proxies['https'] = 'https://%s' % proxyServer
1493                        proxies['ftp'] = 'ftp://%s' % proxyServer
1494            internetSettings.Close()
1495        except (WindowsError, ValueError, TypeError):
1496            # Either registry key not found etc, or the value in an
1497            # unexpected format.
1498            # proxies already set up to be empty so nothing to do
1499            pass
1500        return proxies
1501
1502    def getproxies():
1503        """Return a dictionary of scheme -> proxy server URL mappings.
1504
1505        Returns settings gathered from the environment, if specified,
1506        or the registry.
1507
1508        """
1509        return getproxies_environment() or getproxies_registry()
1510
1511    def proxy_bypass_registry(host):
1512        try:
1513            import _winreg
1514            import re
1515        except ImportError:
1516            # Std modules, so should be around - but you never know!
1517            return 0
1518        try:
1519            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1520                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1521            proxyEnable = _winreg.QueryValueEx(internetSettings,
1522                                               'ProxyEnable')[0]
1523            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1524                                                     'ProxyOverride')[0])
1525            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1526        except WindowsError:
1527            return 0
1528        if not proxyEnable or not proxyOverride:
1529            return 0
1530        # try to make a host list from name and IP address.
1531        rawHost, port = splitport(host)
1532        host = [rawHost]
1533        try:
1534            addr = socket.gethostbyname(rawHost)
1535            if addr != rawHost:
1536                host.append(addr)
1537        except socket.error:
1538            pass
1539        try:
1540            fqdn = socket.getfqdn(rawHost)
1541            if fqdn != rawHost:
1542                host.append(fqdn)
1543        except socket.error:
1544            pass
1545        # make a check value list from the registry entry: replace the
1546        # '<local>' string by the localhost entry and the corresponding
1547        # canonical entry.
1548        proxyOverride = proxyOverride.split(';')
1549        # now check if we match one of the registry values.
1550        for test in proxyOverride:
1551            if test == '<local>':
1552                if '.' not in rawHost:
1553                    return 1
1554            test = test.replace(".", r"\.")     # mask dots
1555            test = test.replace("*", r".*")     # change glob sequence
1556            test = test.replace("?", r".")      # change glob char
1557            for val in host:
1558                # print "%s <--> %s" %( test, val )
1559                if re.match(test, val, re.I):
1560                    return 1
1561        return 0
1562
1563    def proxy_bypass(host):
1564        """Return a dictionary of scheme -> proxy server URL mappings.
1565
1566        Returns settings gathered from the environment, if specified,
1567        or the registry.
1568
1569        """
1570        if getproxies_environment():
1571            return proxy_bypass_environment(host)
1572        else:
1573            return proxy_bypass_registry(host)
1574
1575else:
1576    # By default use environment variables
1577    getproxies = getproxies_environment
1578    proxy_bypass = proxy_bypass_environment
1579
1580# Test and time quote() and unquote()
1581def test1():
1582    s = ''
1583    for i in range(256): s = s + chr(i)
1584    s = s*4
1585    t0 = time.time()
1586    qs = quote(s)
1587    uqs = unquote(qs)
1588    t1 = time.time()
1589    if uqs != s:
1590        print 'Wrong!'
1591    print repr(s)
1592    print repr(qs)
1593    print repr(uqs)
1594    print round(t1 - t0, 3), 'sec'
1595
1596
1597def reporthook(blocknum, blocksize, totalsize):
1598    # Report during remote transfers
1599    print "Block number: %d, Block size: %d, Total size: %d" % (
1600        blocknum, blocksize, totalsize)
1601