urllib.py revision 87e585581da34e0da900f3704c0cf162bc144b7d
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34           "urlencode", "url2pathname", "pathname2url", "splittag",
35           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37           "splitnport", "splitquery", "splitattr", "splitvalue",
38           "getproxies"]
39
40__version__ = '1.17'    # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'nt':
46    from nturl2path import url2pathname, pathname2url
47elif os.name == 'riscos':
48    from rourl2path import url2pathname, pathname2url
49else:
50    def url2pathname(pathname):
51        """OS-specific conversion from a relative URL of the 'file' scheme
52        to a file system path; not recommended for general use."""
53        return unquote(pathname)
54
55    def pathname2url(pathname):
56        """OS-specific conversion from a file system path to a relative URL
57        of the 'file' scheme; not recommended for general use."""
58        return quote(pathname)
59
60# This really consists of two pieces:
61# (1) a class which handles opening of all sorts of URLs
62#     (plus assorted utilities etc.)
63# (2) a set of functions for parsing URLs
64# XXX Should these be separated out into different modules?
65
66
67# Shortcut for basic usage
68_urlopener = None
69def urlopen(url, data=None, proxies=None):
70    """Create a file-like object for the specified URL to read from."""
71    from warnings import warnpy3k
72    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
73             "favor of urllib2.urlopen()", stacklevel=2)
74
75    global _urlopener
76    if proxies is not None:
77        opener = FancyURLopener(proxies=proxies)
78    elif not _urlopener:
79        opener = FancyURLopener()
80        _urlopener = opener
81    else:
82        opener = _urlopener
83    if data is None:
84        return opener.open(url)
85    else:
86        return opener.open(url, data)
87def urlretrieve(url, filename=None, reporthook=None, data=None):
88    global _urlopener
89    if not _urlopener:
90        _urlopener = FancyURLopener()
91    return _urlopener.retrieve(url, filename, reporthook, data)
92def urlcleanup():
93    if _urlopener:
94        _urlopener.cleanup()
95    _safe_quoters.clear()
96    ftpcache.clear()
97
98# check for SSL
99try:
100    import ssl
101except:
102    _have_ssl = False
103else:
104    _have_ssl = True
105
106# exception raised when downloaded size does not match content-length
107class ContentTooShortError(IOError):
108    def __init__(self, message, content):
109        IOError.__init__(self, message)
110        self.content = content
111
112ftpcache = {}
113class URLopener:
114    """Class to open URLs.
115    This is a class rather than just a subroutine because we may need
116    more than one set of global protocol-specific options.
117    Note -- this is a base class for those who don't want the
118    automatic handling of errors type 302 (relocated) and 401
119    (authorization needed)."""
120
121    __tempfiles = None
122
123    version = "Python-urllib/%s" % __version__
124
125    # Constructor
126    def __init__(self, proxies=None, **x509):
127        if proxies is None:
128            proxies = getproxies()
129        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
130        self.proxies = proxies
131        self.key_file = x509.get('key_file')
132        self.cert_file = x509.get('cert_file')
133        self.addheaders = [('User-Agent', self.version)]
134        self.__tempfiles = []
135        self.__unlink = os.unlink # See cleanup()
136        self.tempcache = None
137        # Undocumented feature: if you assign {} to tempcache,
138        # it is used to cache files retrieved with
139        # self.retrieve().  This is not enabled by default
140        # since it does not work for changing documents (and I
141        # haven't got the logic to check expiration headers
142        # yet).
143        self.ftpcache = ftpcache
144        # Undocumented feature: you can use a different
145        # ftp cache by assigning to the .ftpcache member;
146        # in case you want logically independent URL openers
147        # XXX This is not threadsafe.  Bah.
148
149    def __del__(self):
150        self.close()
151
152    def close(self):
153        self.cleanup()
154
155    def cleanup(self):
156        # This code sometimes runs when the rest of this module
157        # has already been deleted, so it can't use any globals
158        # or import anything.
159        if self.__tempfiles:
160            for file in self.__tempfiles:
161                try:
162                    self.__unlink(file)
163                except OSError:
164                    pass
165            del self.__tempfiles[:]
166        if self.tempcache:
167            self.tempcache.clear()
168
169    def addheader(self, *args):
170        """Add a header to be used by the HTTP interface only
171        e.g. u.addheader('Accept', 'sound/basic')"""
172        self.addheaders.append(args)
173
174    # External interface
175    def open(self, fullurl, data=None):
176        """Use URLopener().open(file) instead of open(file, 'r')."""
177        fullurl = unwrap(toBytes(fullurl))
178        # percent encode url, fixing lame server errors for e.g, like space
179        # within url paths.
180        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
181        if self.tempcache and fullurl in self.tempcache:
182            filename, headers = self.tempcache[fullurl]
183            fp = open(filename, 'rb')
184            return addinfourl(fp, headers, fullurl)
185        urltype, url = splittype(fullurl)
186        if not urltype:
187            urltype = 'file'
188        if urltype in self.proxies:
189            proxy = self.proxies[urltype]
190            urltype, proxyhost = splittype(proxy)
191            host, selector = splithost(proxyhost)
192            url = (host, fullurl) # Signal special case to open_*()
193        else:
194            proxy = None
195        name = 'open_' + urltype
196        self.type = urltype
197        name = name.replace('-', '_')
198        if not hasattr(self, name):
199            if proxy:
200                return self.open_unknown_proxy(proxy, fullurl, data)
201            else:
202                return self.open_unknown(fullurl, data)
203        try:
204            if data is None:
205                return getattr(self, name)(url)
206            else:
207                return getattr(self, name)(url, data)
208        except socket.error, msg:
209            raise IOError, ('socket error', msg), sys.exc_info()[2]
210
211    def open_unknown(self, fullurl, data=None):
212        """Overridable interface to open unknown URL type."""
213        type, url = splittype(fullurl)
214        raise IOError, ('url error', 'unknown url type', type)
215
216    def open_unknown_proxy(self, proxy, fullurl, data=None):
217        """Overridable interface to open unknown URL type."""
218        type, url = splittype(fullurl)
219        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
220
221    # External interface
222    def retrieve(self, url, filename=None, reporthook=None, data=None):
223        """retrieve(url) returns (filename, headers) for a local object
224        or (tempfilename, headers) for a remote object."""
225        url = unwrap(toBytes(url))
226        if self.tempcache and url in self.tempcache:
227            return self.tempcache[url]
228        type, url1 = splittype(url)
229        if filename is None and (not type or type == 'file'):
230            try:
231                fp = self.open_local_file(url1)
232                hdrs = fp.info()
233                fp.close()
234                return url2pathname(splithost(url1)[1]), hdrs
235            except IOError:
236                pass
237        fp = self.open(url, data)
238        try:
239            headers = fp.info()
240            if filename:
241                tfp = open(filename, 'wb')
242            else:
243                import tempfile
244                garbage, path = splittype(url)
245                garbage, path = splithost(path or "")
246                path, garbage = splitquery(path or "")
247                path, garbage = splitattr(path or "")
248                suffix = os.path.splitext(path)[1]
249                (fd, filename) = tempfile.mkstemp(suffix)
250                self.__tempfiles.append(filename)
251                tfp = os.fdopen(fd, 'wb')
252            try:
253                result = filename, headers
254                if self.tempcache is not None:
255                    self.tempcache[url] = result
256                bs = 1024*8
257                size = -1
258                read = 0
259                blocknum = 0
260                if "content-length" in headers:
261                    size = int(headers["Content-Length"])
262                if reporthook:
263                    reporthook(blocknum, bs, size)
264                while 1:
265                    block = fp.read(bs)
266                    if block == "":
267                        break
268                    read += len(block)
269                    tfp.write(block)
270                    blocknum += 1
271                    if reporthook:
272                        reporthook(blocknum, bs, size)
273            finally:
274                tfp.close()
275        finally:
276            fp.close()
277
278        # raise exception if actual size does not match content-length header
279        if size >= 0 and read < size:
280            raise ContentTooShortError("retrieval incomplete: got only %i out "
281                                       "of %i bytes" % (read, size), result)
282
283        return result
284
285    # Each method named open_<type> knows how to open that type of URL
286
287    def open_http(self, url, data=None):
288        """Use HTTP protocol."""
289        import httplib
290        user_passwd = None
291        proxy_passwd= None
292        if isinstance(url, str):
293            host, selector = splithost(url)
294            if host:
295                user_passwd, host = splituser(host)
296                host = unquote(host)
297            realhost = host
298        else:
299            host, selector = url
300            # check whether the proxy contains authorization information
301            proxy_passwd, host = splituser(host)
302            # now we proceed with the url we want to obtain
303            urltype, rest = splittype(selector)
304            url = rest
305            user_passwd = None
306            if urltype.lower() != 'http':
307                realhost = None
308            else:
309                realhost, rest = splithost(rest)
310                if realhost:
311                    user_passwd, realhost = splituser(realhost)
312                if user_passwd:
313                    selector = "%s://%s%s" % (urltype, realhost, rest)
314                if proxy_bypass(realhost):
315                    host = realhost
316
317            #print "proxy via http:", host, selector
318        if not host: raise IOError, ('http error', 'no host given')
319
320        if proxy_passwd:
321            import base64
322            proxy_auth = base64.b64encode(proxy_passwd).strip()
323        else:
324            proxy_auth = None
325
326        if user_passwd:
327            import base64
328            auth = base64.b64encode(user_passwd).strip()
329        else:
330            auth = None
331        h = httplib.HTTP(host)
332        if data is not None:
333            h.putrequest('POST', selector)
334            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
335            h.putheader('Content-Length', '%d' % len(data))
336        else:
337            h.putrequest('GET', selector)
338        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
339        if auth: h.putheader('Authorization', 'Basic %s' % auth)
340        if realhost: h.putheader('Host', realhost)
341        for args in self.addheaders: h.putheader(*args)
342        h.endheaders(data)
343        errcode, errmsg, headers = h.getreply()
344        fp = h.getfile()
345        if errcode == -1:
346            if fp: fp.close()
347            # something went wrong with the HTTP status line
348            raise IOError, ('http protocol error', 0,
349                            'got a bad status line', None)
350        # According to RFC 2616, "2xx" code indicates that the client's
351        # request was successfully received, understood, and accepted.
352        if (200 <= errcode < 300):
353            return addinfourl(fp, headers, "http:" + url, errcode)
354        else:
355            if data is None:
356                return self.http_error(url, fp, errcode, errmsg, headers)
357            else:
358                return self.http_error(url, fp, errcode, errmsg, headers, data)
359
360    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
361        """Handle http errors.
362        Derived class can override this, or provide specific handlers
363        named http_error_DDD where DDD is the 3-digit error code."""
364        # First check if there's a specific handler for this error
365        name = 'http_error_%d' % errcode
366        if hasattr(self, name):
367            method = getattr(self, name)
368            if data is None:
369                result = method(url, fp, errcode, errmsg, headers)
370            else:
371                result = method(url, fp, errcode, errmsg, headers, data)
372            if result: return result
373        return self.http_error_default(url, fp, errcode, errmsg, headers)
374
375    def http_error_default(self, url, fp, errcode, errmsg, headers):
376        """Default error handler: close the connection and raise IOError."""
377        void = fp.read()
378        fp.close()
379        raise IOError, ('http error', errcode, errmsg, headers)
380
381    if _have_ssl:
382        def open_https(self, url, data=None):
383            """Use HTTPS protocol."""
384
385            import httplib
386            user_passwd = None
387            proxy_passwd = None
388            if isinstance(url, str):
389                host, selector = splithost(url)
390                if host:
391                    user_passwd, host = splituser(host)
392                    host = unquote(host)
393                realhost = host
394            else:
395                host, selector = url
396                # here, we determine, whether the proxy contains authorization information
397                proxy_passwd, host = splituser(host)
398                urltype, rest = splittype(selector)
399                url = rest
400                user_passwd = None
401                if urltype.lower() != 'https':
402                    realhost = None
403                else:
404                    realhost, rest = splithost(rest)
405                    if realhost:
406                        user_passwd, realhost = splituser(realhost)
407                    if user_passwd:
408                        selector = "%s://%s%s" % (urltype, realhost, rest)
409                #print "proxy via https:", host, selector
410            if not host: raise IOError, ('https error', 'no host given')
411            if proxy_passwd:
412                import base64
413                proxy_auth = base64.b64encode(proxy_passwd).strip()
414            else:
415                proxy_auth = None
416            if user_passwd:
417                import base64
418                auth = base64.b64encode(user_passwd).strip()
419            else:
420                auth = None
421            h = httplib.HTTPS(host, 0,
422                              key_file=self.key_file,
423                              cert_file=self.cert_file)
424            if data is not None:
425                h.putrequest('POST', selector)
426                h.putheader('Content-Type',
427                            'application/x-www-form-urlencoded')
428                h.putheader('Content-Length', '%d' % len(data))
429            else:
430                h.putrequest('GET', selector)
431            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
432            if auth: h.putheader('Authorization', 'Basic %s' % auth)
433            if realhost: h.putheader('Host', realhost)
434            for args in self.addheaders: h.putheader(*args)
435            h.endheaders(data)
436            errcode, errmsg, headers = h.getreply()
437            fp = h.getfile()
438            if errcode == -1:
439                if fp: fp.close()
440                # something went wrong with the HTTP status line
441                raise IOError, ('http protocol error', 0,
442                                'got a bad status line', None)
443            # According to RFC 2616, "2xx" code indicates that the client's
444            # request was successfully received, understood, and accepted.
445            if (200 <= errcode < 300):
446                return addinfourl(fp, headers, "https:" + url, errcode)
447            else:
448                if data is None:
449                    return self.http_error(url, fp, errcode, errmsg, headers)
450                else:
451                    return self.http_error(url, fp, errcode, errmsg, headers,
452                                           data)
453
454    def open_file(self, url):
455        """Use local file or FTP depending on form of URL."""
456        if not isinstance(url, str):
457            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
458        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
459            return self.open_ftp(url)
460        else:
461            return self.open_local_file(url)
462
463    def open_local_file(self, url):
464        """Use local file."""
465        import mimetypes, mimetools, email.utils
466        try:
467            from cStringIO import StringIO
468        except ImportError:
469            from StringIO import StringIO
470        host, file = splithost(url)
471        localname = url2pathname(file)
472        try:
473            stats = os.stat(localname)
474        except OSError, e:
475            raise IOError(e.errno, e.strerror, e.filename)
476        size = stats.st_size
477        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
478        mtype = mimetypes.guess_type(url)[0]
479        headers = mimetools.Message(StringIO(
480            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
481            (mtype or 'text/plain', size, modified)))
482        if not host:
483            urlfile = file
484            if file[:1] == '/':
485                urlfile = 'file://' + file
486            return addinfourl(open(localname, 'rb'),
487                              headers, urlfile)
488        host, port = splitport(host)
489        if not port \
490           and socket.gethostbyname(host) in (localhost(), thishost()):
491            urlfile = file
492            if file[:1] == '/':
493                urlfile = 'file://' + file
494            return addinfourl(open(localname, 'rb'),
495                              headers, urlfile)
496        raise IOError, ('local file error', 'not on local host')
497
498    def open_ftp(self, url):
499        """Use FTP protocol."""
500        if not isinstance(url, str):
501            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
502        import mimetypes, mimetools
503        try:
504            from cStringIO import StringIO
505        except ImportError:
506            from StringIO import StringIO
507        host, path = splithost(url)
508        if not host: raise IOError, ('ftp error', 'no host given')
509        host, port = splitport(host)
510        user, host = splituser(host)
511        if user: user, passwd = splitpasswd(user)
512        else: passwd = None
513        host = unquote(host)
514        user = user or ''
515        passwd = passwd or ''
516        host = socket.gethostbyname(host)
517        if not port:
518            import ftplib
519            port = ftplib.FTP_PORT
520        else:
521            port = int(port)
522        path, attrs = splitattr(path)
523        path = unquote(path)
524        dirs = path.split('/')
525        dirs, file = dirs[:-1], dirs[-1]
526        if dirs and not dirs[0]: dirs = dirs[1:]
527        if dirs and not dirs[0]: dirs[0] = '/'
528        key = user, host, port, '/'.join(dirs)
529        # XXX thread unsafe!
530        if len(self.ftpcache) > MAXFTPCACHE:
531            # Prune the cache, rather arbitrarily
532            for k in self.ftpcache.keys():
533                if k != key:
534                    v = self.ftpcache[k]
535                    del self.ftpcache[k]
536                    v.close()
537        try:
538            if not key in self.ftpcache:
539                self.ftpcache[key] = \
540                    ftpwrapper(user, passwd, host, port, dirs)
541            if not file: type = 'D'
542            else: type = 'I'
543            for attr in attrs:
544                attr, value = splitvalue(attr)
545                if attr.lower() == 'type' and \
546                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
547                    type = value.upper()
548            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
549            mtype = mimetypes.guess_type("ftp:" + url)[0]
550            headers = ""
551            if mtype:
552                headers += "Content-Type: %s\n" % mtype
553            if retrlen is not None and retrlen >= 0:
554                headers += "Content-Length: %d\n" % retrlen
555            headers = mimetools.Message(StringIO(headers))
556            return addinfourl(fp, headers, "ftp:" + url)
557        except ftperrors(), msg:
558            raise IOError, ('ftp error', msg), sys.exc_info()[2]
559
560    def open_data(self, url, data=None):
561        """Use "data" URL."""
562        if not isinstance(url, str):
563            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
564        # ignore POSTed data
565        #
566        # syntax of data URLs:
567        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
568        # mediatype := [ type "/" subtype ] *( ";" parameter )
569        # data      := *urlchar
570        # parameter := attribute "=" value
571        import mimetools
572        try:
573            from cStringIO import StringIO
574        except ImportError:
575            from StringIO import StringIO
576        try:
577            [type, data] = url.split(',', 1)
578        except ValueError:
579            raise IOError, ('data error', 'bad data URL')
580        if not type:
581            type = 'text/plain;charset=US-ASCII'
582        semi = type.rfind(';')
583        if semi >= 0 and '=' not in type[semi:]:
584            encoding = type[semi+1:]
585            type = type[:semi]
586        else:
587            encoding = ''
588        msg = []
589        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
590                                            time.gmtime(time.time())))
591        msg.append('Content-type: %s' % type)
592        if encoding == 'base64':
593            import base64
594            data = base64.decodestring(data)
595        else:
596            data = unquote(data)
597        msg.append('Content-Length: %d' % len(data))
598        msg.append('')
599        msg.append(data)
600        msg = '\n'.join(msg)
601        f = StringIO(msg)
602        headers = mimetools.Message(f, 0)
603        #f.fileno = None     # needed for addinfourl
604        return addinfourl(f, headers, url)
605
606
607class FancyURLopener(URLopener):
608    """Derived class with handlers for errors we can handle (perhaps)."""
609
610    def __init__(self, *args, **kwargs):
611        URLopener.__init__(self, *args, **kwargs)
612        self.auth_cache = {}
613        self.tries = 0
614        self.maxtries = 10
615
616    def http_error_default(self, url, fp, errcode, errmsg, headers):
617        """Default error handling -- don't raise an exception."""
618        return addinfourl(fp, headers, "http:" + url, errcode)
619
620    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
621        """Error 302 -- relocated (temporarily)."""
622        self.tries += 1
623        if self.maxtries and self.tries >= self.maxtries:
624            if hasattr(self, "http_error_500"):
625                meth = self.http_error_500
626            else:
627                meth = self.http_error_default
628            self.tries = 0
629            return meth(url, fp, 500,
630                        "Internal Server Error: Redirect Recursion", headers)
631        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
632                                        data)
633        self.tries = 0
634        return result
635
636    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
637        if 'location' in headers:
638            newurl = headers['location']
639        elif 'uri' in headers:
640            newurl = headers['uri']
641        else:
642            return
643        void = fp.read()
644        fp.close()
645        # In case the server sent a relative URL, join with original:
646        newurl = basejoin(self.type + ":" + url, newurl)
647
648        # For security reasons we do not allow redirects to protocols
649        # other than HTTP, HTTPS or FTP.
650        newurl_lower = newurl.lower()
651        if not (newurl_lower.startswith('http://') or
652                newurl_lower.startswith('https://') or
653                newurl_lower.startswith('ftp://')):
654            raise IOError('redirect error', errcode,
655                          errmsg + " - Redirection to url '%s' is not allowed" %
656                          newurl,
657                          headers)
658
659        return self.open(newurl)
660
661    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
662        """Error 301 -- also relocated (permanently)."""
663        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
664
665    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
666        """Error 303 -- also relocated (essentially identical to 302)."""
667        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
668
669    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
670        """Error 307 -- relocated, but turn POST into error."""
671        if data is None:
672            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
673        else:
674            return self.http_error_default(url, fp, errcode, errmsg, headers)
675
676    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
677        """Error 401 -- authentication required.
678        This function supports Basic authentication only."""
679        if not 'www-authenticate' in headers:
680            URLopener.http_error_default(self, url, fp,
681                                         errcode, errmsg, headers)
682        stuff = headers['www-authenticate']
683        import re
684        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
685        if not match:
686            URLopener.http_error_default(self, url, fp,
687                                         errcode, errmsg, headers)
688        scheme, realm = match.groups()
689        if scheme.lower() != 'basic':
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        name = 'retry_' + self.type + '_basic_auth'
693        if data is None:
694            return getattr(self,name)(url, realm)
695        else:
696            return getattr(self,name)(url, realm, data)
697
698    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
699        """Error 407 -- proxy authentication required.
700        This function supports Basic authentication only."""
701        if not 'proxy-authenticate' in headers:
702            URLopener.http_error_default(self, url, fp,
703                                         errcode, errmsg, headers)
704        stuff = headers['proxy-authenticate']
705        import re
706        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
707        if not match:
708            URLopener.http_error_default(self, url, fp,
709                                         errcode, errmsg, headers)
710        scheme, realm = match.groups()
711        if scheme.lower() != 'basic':
712            URLopener.http_error_default(self, url, fp,
713                                         errcode, errmsg, headers)
714        name = 'retry_proxy_' + self.type + '_basic_auth'
715        if data is None:
716            return getattr(self,name)(url, realm)
717        else:
718            return getattr(self,name)(url, realm, data)
719
720    def retry_proxy_http_basic_auth(self, url, realm, data=None):
721        host, selector = splithost(url)
722        newurl = 'http://' + host + selector
723        proxy = self.proxies['http']
724        urltype, proxyhost = splittype(proxy)
725        proxyhost, proxyselector = splithost(proxyhost)
726        i = proxyhost.find('@') + 1
727        proxyhost = proxyhost[i:]
728        user, passwd = self.get_user_passwd(proxyhost, realm, i)
729        if not (user or passwd): return None
730        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
731        self.proxies['http'] = 'http://' + proxyhost + proxyselector
732        if data is None:
733            return self.open(newurl)
734        else:
735            return self.open(newurl, data)
736
737    def retry_proxy_https_basic_auth(self, url, realm, data=None):
738        host, selector = splithost(url)
739        newurl = 'https://' + host + selector
740        proxy = self.proxies['https']
741        urltype, proxyhost = splittype(proxy)
742        proxyhost, proxyselector = splithost(proxyhost)
743        i = proxyhost.find('@') + 1
744        proxyhost = proxyhost[i:]
745        user, passwd = self.get_user_passwd(proxyhost, realm, i)
746        if not (user or passwd): return None
747        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
748        self.proxies['https'] = 'https://' + proxyhost + proxyselector
749        if data is None:
750            return self.open(newurl)
751        else:
752            return self.open(newurl, data)
753
754    def retry_http_basic_auth(self, url, realm, data=None):
755        host, selector = splithost(url)
756        i = host.find('@') + 1
757        host = host[i:]
758        user, passwd = self.get_user_passwd(host, realm, i)
759        if not (user or passwd): return None
760        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
761        newurl = 'http://' + host + selector
762        if data is None:
763            return self.open(newurl)
764        else:
765            return self.open(newurl, data)
766
767    def retry_https_basic_auth(self, url, realm, data=None):
768        host, selector = splithost(url)
769        i = host.find('@') + 1
770        host = host[i:]
771        user, passwd = self.get_user_passwd(host, realm, i)
772        if not (user or passwd): return None
773        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
774        newurl = 'https://' + host + selector
775        if data is None:
776            return self.open(newurl)
777        else:
778            return self.open(newurl, data)
779
780    def get_user_passwd(self, host, realm, clear_cache=0):
781        key = realm + '@' + host.lower()
782        if key in self.auth_cache:
783            if clear_cache:
784                del self.auth_cache[key]
785            else:
786                return self.auth_cache[key]
787        user, passwd = self.prompt_user_passwd(host, realm)
788        if user or passwd: self.auth_cache[key] = (user, passwd)
789        return user, passwd
790
791    def prompt_user_passwd(self, host, realm):
792        """Override this in a GUI environment!"""
793        import getpass
794        try:
795            user = raw_input("Enter username for %s at %s: " % (realm,
796                                                                host))
797            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
798                (user, realm, host))
799            return user, passwd
800        except KeyboardInterrupt:
801            print
802            return None, None
803
804
805# Utility functions
806
807_localhost = None
808def localhost():
809    """Return the IP address of the magic hostname 'localhost'."""
810    global _localhost
811    if _localhost is None:
812        _localhost = socket.gethostbyname('localhost')
813    return _localhost
814
815_thishost = None
816def thishost():
817    """Return the IP address of the current host."""
818    global _thishost
819    if _thishost is None:
820        _thishost = socket.gethostbyname(socket.gethostname())
821    return _thishost
822
823_ftperrors = None
824def ftperrors():
825    """Return the set of errors raised by the FTP class."""
826    global _ftperrors
827    if _ftperrors is None:
828        import ftplib
829        _ftperrors = ftplib.all_errors
830    return _ftperrors
831
832_noheaders = None
833def noheaders():
834    """Return an empty mimetools.Message object."""
835    global _noheaders
836    if _noheaders is None:
837        import mimetools
838        try:
839            from cStringIO import StringIO
840        except ImportError:
841            from StringIO import StringIO
842        _noheaders = mimetools.Message(StringIO(), 0)
843        _noheaders.fp.close()   # Recycle file descriptor
844    return _noheaders
845
846
847# Utility classes
848
849class ftpwrapper:
850    """Class used by open_ftp() for cache of open FTP connections."""
851
852    def __init__(self, user, passwd, host, port, dirs,
853                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
854                 persistent=True):
855        self.user = user
856        self.passwd = passwd
857        self.host = host
858        self.port = port
859        self.dirs = dirs
860        self.timeout = timeout
861        self.refcount = 0
862        self.keepalive = persistent
863        self.init()
864
865    def init(self):
866        import ftplib
867        self.busy = 0
868        self.ftp = ftplib.FTP()
869        self.ftp.connect(self.host, self.port, self.timeout)
870        self.ftp.login(self.user, self.passwd)
871        for dir in self.dirs:
872            self.ftp.cwd(dir)
873
874    def retrfile(self, file, type):
875        import ftplib
876        self.endtransfer()
877        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
878        else: cmd = 'TYPE ' + type; isdir = 0
879        try:
880            self.ftp.voidcmd(cmd)
881        except ftplib.all_errors:
882            self.init()
883            self.ftp.voidcmd(cmd)
884        conn = None
885        if file and not isdir:
886            # Try to retrieve as a file
887            try:
888                cmd = 'RETR ' + file
889                conn, retrlen = self.ftp.ntransfercmd(cmd)
890            except ftplib.error_perm, reason:
891                if str(reason)[:3] != '550':
892                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
893        if not conn:
894            # Set transfer mode to ASCII!
895            self.ftp.voidcmd('TYPE A')
896            # Try a directory listing. Verify that directory exists.
897            if file:
898                pwd = self.ftp.pwd()
899                try:
900                    try:
901                        self.ftp.cwd(file)
902                    except ftplib.error_perm, reason:
903                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
904                finally:
905                    self.ftp.cwd(pwd)
906                cmd = 'LIST ' + file
907            else:
908                cmd = 'LIST'
909            conn, retrlen = self.ftp.ntransfercmd(cmd)
910        self.busy = 1
911        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
912        self.refcount += 1
913        conn.close()
914        # Pass back both a suitably decorated object and a retrieval length
915        return (ftpobj, retrlen)
916
917    def endtransfer(self):
918        if not self.busy:
919            return
920        self.busy = 0
921        try:
922            self.ftp.voidresp()
923        except ftperrors():
924            pass
925
926    def close(self):
927        self.keepalive = False
928        if self.refcount <= 0:
929            self.real_close()
930
931    def file_close(self):
932        self.endtransfer()
933        self.refcount -= 1
934        if self.refcount <= 0 and not self.keepalive:
935            self.real_close()
936
937    def real_close(self):
938        self.endtransfer()
939        try:
940            self.ftp.close()
941        except ftperrors():
942            pass
943
944class addbase:
945    """Base class for addinfo and addclosehook."""
946
947    def __init__(self, fp):
948        self.fp = fp
949        self.read = self.fp.read
950        self.readline = self.fp.readline
951        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
952        if hasattr(self.fp, "fileno"):
953            self.fileno = self.fp.fileno
954        else:
955            self.fileno = lambda: None
956        if hasattr(self.fp, "__iter__"):
957            self.__iter__ = self.fp.__iter__
958            if hasattr(self.fp, "next"):
959                self.next = self.fp.next
960
961    def __repr__(self):
962        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
963                                             id(self), self.fp)
964
965    def close(self):
966        self.read = None
967        self.readline = None
968        self.readlines = None
969        self.fileno = None
970        if self.fp: self.fp.close()
971        self.fp = None
972
973class addclosehook(addbase):
974    """Class to add a close hook to an open file."""
975
976    def __init__(self, fp, closehook, *hookargs):
977        addbase.__init__(self, fp)
978        self.closehook = closehook
979        self.hookargs = hookargs
980
981    def close(self):
982        addbase.close(self)
983        if self.closehook:
984            self.closehook(*self.hookargs)
985            self.closehook = None
986            self.hookargs = None
987
988class addinfo(addbase):
989    """class to add an info() method to an open file."""
990
991    def __init__(self, fp, headers):
992        addbase.__init__(self, fp)
993        self.headers = headers
994
995    def info(self):
996        return self.headers
997
998class addinfourl(addbase):
999    """class to add info() and geturl() methods to an open file."""
1000
1001    def __init__(self, fp, headers, url, code=None):
1002        addbase.__init__(self, fp)
1003        self.headers = headers
1004        self.url = url
1005        self.code = code
1006
1007    def info(self):
1008        return self.headers
1009
1010    def getcode(self):
1011        return self.code
1012
1013    def geturl(self):
1014        return self.url
1015
1016
1017# Utilities to parse URLs (most of these return None for missing parts):
1018# unwrap('<URL:type://host/path>') --> 'type://host/path'
1019# splittype('type:opaquestring') --> 'type', 'opaquestring'
1020# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1021# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1022# splitpasswd('user:passwd') -> 'user', 'passwd'
1023# splitport('host:port') --> 'host', 'port'
1024# splitquery('/path?query') --> '/path', 'query'
1025# splittag('/path#tag') --> '/path', 'tag'
1026# splitattr('/path;attr1=value1;attr2=value2;...') ->
1027#   '/path', ['attr1=value1', 'attr2=value2', ...]
1028# splitvalue('attr=value') --> 'attr', 'value'
1029# unquote('abc%20def') -> 'abc def'
1030# quote('abc def') -> 'abc%20def')
1031
1032try:
1033    unicode
1034except NameError:
1035    def _is_unicode(x):
1036        return 0
1037else:
1038    def _is_unicode(x):
1039        return isinstance(x, unicode)
1040
1041def toBytes(url):
1042    """toBytes(u"URL") --> 'URL'."""
1043    # Most URL schemes require ASCII. If that changes, the conversion
1044    # can be relaxed
1045    if _is_unicode(url):
1046        try:
1047            url = url.encode("ASCII")
1048        except UnicodeError:
1049            raise UnicodeError("URL " + repr(url) +
1050                               " contains non-ASCII characters")
1051    return url
1052
1053def unwrap(url):
1054    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1055    url = url.strip()
1056    if url[:1] == '<' and url[-1:] == '>':
1057        url = url[1:-1].strip()
1058    if url[:4] == 'URL:': url = url[4:].strip()
1059    return url
1060
1061_typeprog = None
1062def splittype(url):
1063    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1064    global _typeprog
1065    if _typeprog is None:
1066        import re
1067        _typeprog = re.compile('^([^/:]+):')
1068
1069    match = _typeprog.match(url)
1070    if match:
1071        scheme = match.group(1)
1072        return scheme.lower(), url[len(scheme) + 1:]
1073    return None, url
1074
1075_hostprog = None
1076def splithost(url):
1077    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1078    global _hostprog
1079    if _hostprog is None:
1080        import re
1081        _hostprog = re.compile('^//([^/?]*)(.*)$')
1082
1083    match = _hostprog.match(url)
1084    if match:
1085        host_port = match.group(1)
1086        path = match.group(2)
1087        if path and not path.startswith('/'):
1088            path = '/' + path
1089        return host_port, path
1090    return None, url
1091
1092_userprog = None
1093def splituser(host):
1094    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1095    global _userprog
1096    if _userprog is None:
1097        import re
1098        _userprog = re.compile('^(.*)@(.*)$')
1099
1100    match = _userprog.match(host)
1101    if match: return match.group(1, 2)
1102    return None, host
1103
1104_passwdprog = None
1105def splitpasswd(user):
1106    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1107    global _passwdprog
1108    if _passwdprog is None:
1109        import re
1110        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1111
1112    match = _passwdprog.match(user)
1113    if match: return match.group(1, 2)
1114    return user, None
1115
1116# splittag('/path#tag') --> '/path', 'tag'
1117_portprog = None
1118def splitport(host):
1119    """splitport('host:port') --> 'host', 'port'."""
1120    global _portprog
1121    if _portprog is None:
1122        import re
1123        _portprog = re.compile('^(.*):([0-9]+)$')
1124
1125    match = _portprog.match(host)
1126    if match: return match.group(1, 2)
1127    return host, None
1128
1129_nportprog = None
1130def splitnport(host, defport=-1):
1131    """Split host and port, returning numeric port.
1132    Return given default port if no ':' found; defaults to -1.
1133    Return numerical port if a valid number are found after ':'.
1134    Return None if ':' but not a valid number."""
1135    global _nportprog
1136    if _nportprog is None:
1137        import re
1138        _nportprog = re.compile('^(.*):(.*)$')
1139
1140    match = _nportprog.match(host)
1141    if match:
1142        host, port = match.group(1, 2)
1143        try:
1144            if not port: raise ValueError, "no digits"
1145            nport = int(port)
1146        except ValueError:
1147            nport = None
1148        return host, nport
1149    return host, defport
1150
1151_queryprog = None
1152def splitquery(url):
1153    """splitquery('/path?query') --> '/path', 'query'."""
1154    global _queryprog
1155    if _queryprog is None:
1156        import re
1157        _queryprog = re.compile('^(.*)\?([^?]*)$')
1158
1159    match = _queryprog.match(url)
1160    if match: return match.group(1, 2)
1161    return url, None
1162
1163_tagprog = None
1164def splittag(url):
1165    """splittag('/path#tag') --> '/path', 'tag'."""
1166    global _tagprog
1167    if _tagprog is None:
1168        import re
1169        _tagprog = re.compile('^(.*)#([^#]*)$')
1170
1171    match = _tagprog.match(url)
1172    if match: return match.group(1, 2)
1173    return url, None
1174
1175def splitattr(url):
1176    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1177        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1178    words = url.split(';')
1179    return words[0], words[1:]
1180
1181_valueprog = None
1182def splitvalue(attr):
1183    """splitvalue('attr=value') --> 'attr', 'value'."""
1184    global _valueprog
1185    if _valueprog is None:
1186        import re
1187        _valueprog = re.compile('^([^=]*)=(.*)$')
1188
1189    match = _valueprog.match(attr)
1190    if match: return match.group(1, 2)
1191    return attr, None
1192
1193# urlparse contains a duplicate of this method to avoid a circular import.  If
1194# you update this method, also update the copy in urlparse.  This code
1195# duplication does not exist in Python3.
1196
1197_hexdig = '0123456789ABCDEFabcdef'
1198_hextochr = dict((a + b, chr(int(a + b, 16)))
1199                 for a in _hexdig for b in _hexdig)
1200
1201def unquote(s):
1202    """unquote('abc%20def') -> 'abc def'."""
1203    res = s.split('%')
1204    # fastpath
1205    if len(res) == 1:
1206        return s
1207    s = res[0]
1208    for item in res[1:]:
1209        try:
1210            s += _hextochr[item[:2]] + item[2:]
1211        except KeyError:
1212            s += '%' + item
1213        except UnicodeDecodeError:
1214            s += unichr(int(item[:2], 16)) + item[2:]
1215    return s
1216
1217def unquote_plus(s):
1218    """unquote('%7e/abc+def') -> '~/abc def'"""
1219    s = s.replace('+', ' ')
1220    return unquote(s)
1221
1222always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1223               'abcdefghijklmnopqrstuvwxyz'
1224               '0123456789' '_.-')
1225_safe_map = {}
1226for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1227    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1228_safe_quoters = {}
1229
1230def quote(s, safe='/'):
1231    """quote('abc def') -> 'abc%20def'
1232
1233    Each part of a URL, e.g. the path info, the query, etc., has a
1234    different set of reserved characters that must be quoted.
1235
1236    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1237    the following reserved characters.
1238
1239    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1240                  "$" | ","
1241
1242    Each of these characters is reserved in some component of a URL,
1243    but not necessarily in all of them.
1244
1245    By default, the quote function is intended for quoting the path
1246    section of a URL.  Thus, it will not encode '/'.  This character
1247    is reserved, but in typical usage the quote function is being
1248    called on a path where the existing slash characters are used as
1249    reserved characters.
1250    """
1251    # fastpath
1252    if not s:
1253        if s is None:
1254            raise TypeError('None object cannot be quoted')
1255        return s
1256    cachekey = (safe, always_safe)
1257    try:
1258        (quoter, safe) = _safe_quoters[cachekey]
1259    except KeyError:
1260        safe_map = _safe_map.copy()
1261        safe_map.update([(c, c) for c in safe])
1262        quoter = safe_map.__getitem__
1263        safe = always_safe + safe
1264        _safe_quoters[cachekey] = (quoter, safe)
1265    if not s.rstrip(safe):
1266        return s
1267    return ''.join(map(quoter, s))
1268
1269def quote_plus(s, safe=''):
1270    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1271    if ' ' in s:
1272        s = quote(s, safe + ' ')
1273        return s.replace(' ', '+')
1274    return quote(s, safe)
1275
1276def urlencode(query, doseq=0):
1277    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1278
1279    If any values in the query arg are sequences and doseq is true, each
1280    sequence element is converted to a separate parameter.
1281
1282    If the query arg is a sequence of two-element tuples, the order of the
1283    parameters in the output will match the order of parameters in the
1284    input.
1285    """
1286
1287    if hasattr(query,"items"):
1288        # mapping objects
1289        query = query.items()
1290    else:
1291        # it's a bother at times that strings and string-like objects are
1292        # sequences...
1293        try:
1294            # non-sequence items should not work with len()
1295            # non-empty strings will fail this
1296            if len(query) and not isinstance(query[0], tuple):
1297                raise TypeError
1298            # zero-length sequences of all types will get here and succeed,
1299            # but that's a minor nit - since the original implementation
1300            # allowed empty dicts that type of behavior probably should be
1301            # preserved for consistency
1302        except TypeError:
1303            ty,va,tb = sys.exc_info()
1304            raise TypeError, "not a valid non-string sequence or mapping object", tb
1305
1306    l = []
1307    if not doseq:
1308        # preserve old behavior
1309        for k, v in query:
1310            k = quote_plus(str(k))
1311            v = quote_plus(str(v))
1312            l.append(k + '=' + v)
1313    else:
1314        for k, v in query:
1315            k = quote_plus(str(k))
1316            if isinstance(v, str):
1317                v = quote_plus(v)
1318                l.append(k + '=' + v)
1319            elif _is_unicode(v):
1320                # is there a reasonable way to convert to ASCII?
1321                # encode generates a string, but "replace" or "ignore"
1322                # lose information and "strict" can raise UnicodeError
1323                v = quote_plus(v.encode("ASCII","replace"))
1324                l.append(k + '=' + v)
1325            else:
1326                try:
1327                    # is this a sufficient test for sequence-ness?
1328                    len(v)
1329                except TypeError:
1330                    # not a sequence
1331                    v = quote_plus(str(v))
1332                    l.append(k + '=' + v)
1333                else:
1334                    # loop over the sequence
1335                    for elt in v:
1336                        l.append(k + '=' + quote_plus(str(elt)))
1337    return '&'.join(l)
1338
1339# Proxy handling
1340def getproxies_environment():
1341    """Return a dictionary of scheme -> proxy server URL mappings.
1342
1343    Scan the environment for variables named <scheme>_proxy;
1344    this seems to be the standard convention.  If you need a
1345    different way, you can pass a proxies dictionary to the
1346    [Fancy]URLopener constructor.
1347
1348    """
1349    proxies = {}
1350    for name, value in os.environ.items():
1351        name = name.lower()
1352        if value and name[-6:] == '_proxy':
1353            proxies[name[:-6]] = value
1354    return proxies
1355
1356def proxy_bypass_environment(host):
1357    """Test if proxies should not be used for a particular host.
1358
1359    Checks the environment for a variable named no_proxy, which should
1360    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1361    """
1362    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1363    # '*' is special case for always bypass
1364    if no_proxy == '*':
1365        return 1
1366    # strip port off host
1367    hostonly, port = splitport(host)
1368    # check if the host ends with any of the DNS suffixes
1369    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1370    for name in no_proxy_list:
1371        if name and (hostonly.endswith(name) or host.endswith(name)):
1372            return 1
1373    # otherwise, don't bypass
1374    return 0
1375
1376
1377if sys.platform == 'darwin':
1378    from _scproxy import _get_proxy_settings, _get_proxies
1379
1380    def proxy_bypass_macosx_sysconf(host):
1381        """
1382        Return True iff this host shouldn't be accessed using a proxy
1383
1384        This function uses the MacOSX framework SystemConfiguration
1385        to fetch the proxy information.
1386        """
1387        import re
1388        import socket
1389        from fnmatch import fnmatch
1390
1391        hostonly, port = splitport(host)
1392
1393        def ip2num(ipAddr):
1394            parts = ipAddr.split('.')
1395            parts = map(int, parts)
1396            if len(parts) != 4:
1397                parts = (parts + [0, 0, 0, 0])[:4]
1398            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1399
1400        proxy_settings = _get_proxy_settings()
1401
1402        # Check for simple host names:
1403        if '.' not in host:
1404            if proxy_settings['exclude_simple']:
1405                return True
1406
1407        hostIP = None
1408
1409        for value in proxy_settings.get('exceptions', ()):
1410            # Items in the list are strings like these: *.local, 169.254/16
1411            if not value: continue
1412
1413            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1414            if m is not None:
1415                if hostIP is None:
1416                    try:
1417                        hostIP = socket.gethostbyname(hostonly)
1418                        hostIP = ip2num(hostIP)
1419                    except socket.error:
1420                        continue
1421
1422                base = ip2num(m.group(1))
1423                mask = m.group(2)
1424                if mask is None:
1425                    mask = 8 * (m.group(1).count('.') + 1)
1426
1427                else:
1428                    mask = int(mask[1:])
1429                mask = 32 - mask
1430
1431                if (hostIP >> mask) == (base >> mask):
1432                    return True
1433
1434            elif fnmatch(host, value):
1435                return True
1436
1437        return False
1438
1439    def getproxies_macosx_sysconf():
1440        """Return a dictionary of scheme -> proxy server URL mappings.
1441
1442        This function uses the MacOSX framework SystemConfiguration
1443        to fetch the proxy information.
1444        """
1445        return _get_proxies()
1446
1447    def proxy_bypass(host):
1448        if getproxies_environment():
1449            return proxy_bypass_environment(host)
1450        else:
1451            return proxy_bypass_macosx_sysconf(host)
1452
1453    def getproxies():
1454        return getproxies_environment() or getproxies_macosx_sysconf()
1455
1456elif os.name == 'nt':
1457    def getproxies_registry():
1458        """Return a dictionary of scheme -> proxy server URL mappings.
1459
1460        Win32 uses the registry to store proxies.
1461
1462        """
1463        proxies = {}
1464        try:
1465            import _winreg
1466        except ImportError:
1467            # Std module, so should be around - but you never know!
1468            return proxies
1469        try:
1470            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1471                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1472            proxyEnable = _winreg.QueryValueEx(internetSettings,
1473                                               'ProxyEnable')[0]
1474            if proxyEnable:
1475                # Returned as Unicode but problems if not converted to ASCII
1476                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1477                                                       'ProxyServer')[0])
1478                if '=' in proxyServer:
1479                    # Per-protocol settings
1480                    for p in proxyServer.split(';'):
1481                        protocol, address = p.split('=', 1)
1482                        # See if address has a type:// prefix
1483                        import re
1484                        if not re.match('^([^/:]+)://', address):
1485                            address = '%s://%s' % (protocol, address)
1486                        proxies[protocol] = address
1487                else:
1488                    # Use one setting for all protocols
1489                    if proxyServer[:5] == 'http:':
1490                        proxies['http'] = proxyServer
1491                    else:
1492                        proxies['http'] = 'http://%s' % proxyServer
1493                        proxies['https'] = 'https://%s' % proxyServer
1494                        proxies['ftp'] = 'ftp://%s' % proxyServer
1495            internetSettings.Close()
1496        except (WindowsError, ValueError, TypeError):
1497            # Either registry key not found etc, or the value in an
1498            # unexpected format.
1499            # proxies already set up to be empty so nothing to do
1500            pass
1501        return proxies
1502
1503    def getproxies():
1504        """Return a dictionary of scheme -> proxy server URL mappings.
1505
1506        Returns settings gathered from the environment, if specified,
1507        or the registry.
1508
1509        """
1510        return getproxies_environment() or getproxies_registry()
1511
1512    def proxy_bypass_registry(host):
1513        try:
1514            import _winreg
1515            import re
1516        except ImportError:
1517            # Std modules, so should be around - but you never know!
1518            return 0
1519        try:
1520            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1521                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1522            proxyEnable = _winreg.QueryValueEx(internetSettings,
1523                                               'ProxyEnable')[0]
1524            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1525                                                     'ProxyOverride')[0])
1526            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1527        except WindowsError:
1528            return 0
1529        if not proxyEnable or not proxyOverride:
1530            return 0
1531        # try to make a host list from name and IP address.
1532        rawHost, port = splitport(host)
1533        host = [rawHost]
1534        try:
1535            addr = socket.gethostbyname(rawHost)
1536            if addr != rawHost:
1537                host.append(addr)
1538        except socket.error:
1539            pass
1540        try:
1541            fqdn = socket.getfqdn(rawHost)
1542            if fqdn != rawHost:
1543                host.append(fqdn)
1544        except socket.error:
1545            pass
1546        # make a check value list from the registry entry: replace the
1547        # '<local>' string by the localhost entry and the corresponding
1548        # canonical entry.
1549        proxyOverride = proxyOverride.split(';')
1550        # now check if we match one of the registry values.
1551        for test in proxyOverride:
1552            if test == '<local>':
1553                if '.' not in rawHost:
1554                    return 1
1555            test = test.replace(".", r"\.")     # mask dots
1556            test = test.replace("*", r".*")     # change glob sequence
1557            test = test.replace("?", r".")      # change glob char
1558            for val in host:
1559                # print "%s <--> %s" %( test, val )
1560                if re.match(test, val, re.I):
1561                    return 1
1562        return 0
1563
1564    def proxy_bypass(host):
1565        """Return a dictionary of scheme -> proxy server URL mappings.
1566
1567        Returns settings gathered from the environment, if specified,
1568        or the registry.
1569
1570        """
1571        if getproxies_environment():
1572            return proxy_bypass_environment(host)
1573        else:
1574            return proxy_bypass_registry(host)
1575
1576else:
1577    # By default use environment variables
1578    getproxies = getproxies_environment
1579    proxy_bypass = proxy_bypass_environment
1580
1581# Test and time quote() and unquote()
1582def test1():
1583    s = ''
1584    for i in range(256): s = s + chr(i)
1585    s = s*4
1586    t0 = time.time()
1587    qs = quote(s)
1588    uqs = unquote(qs)
1589    t1 = time.time()
1590    if uqs != s:
1591        print 'Wrong!'
1592    print repr(s)
1593    print repr(qs)
1594    print repr(uqs)
1595    print round(t1 - t0, 3), 'sec'
1596
1597
1598def reporthook(blocknum, blocksize, totalsize):
1599    # Report during remote transfers
1600    print "Block number: %d, Block size: %d, Total size: %d" % (
1601        blocknum, blocksize, totalsize)
1602