urllib.py revision d5e6cf2b152061cdae0164cef2382086c7638bbc
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34           "urlencode", "url2pathname", "pathname2url", "splittag",
35           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37           "splitnport", "splitquery", "splitattr", "splitvalue",
38           "getproxies"]
39
40__version__ = '1.17'    # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'mac':
46    from macurl2path import url2pathname, pathname2url
47elif os.name == 'nt':
48    from nturl2path import url2pathname, pathname2url
49elif os.name == 'riscos':
50    from rourl2path import url2pathname, pathname2url
51else:
52    def url2pathname(pathname):
53        """OS-specific conversion from a relative URL of the 'file' scheme
54        to a file system path; not recommended for general use."""
55        return unquote(pathname)
56
57    def pathname2url(pathname):
58        """OS-specific conversion from a file system path to a relative URL
59        of the 'file' scheme; not recommended for general use."""
60        return quote(pathname)
61
62# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64#     (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
71def urlopen(url, data=None, proxies=None):
72    """urlopen(url [, data]) -> open file-like object"""
73    global _urlopener
74    if proxies is not None:
75        opener = FancyURLopener(proxies=proxies)
76    elif not _urlopener:
77        opener = FancyURLopener()
78        _urlopener = opener
79    else:
80        opener = _urlopener
81    if data is None:
82        return opener.open(url)
83    else:
84        return opener.open(url, data)
85def urlretrieve(url, filename=None, reporthook=None, data=None):
86    global _urlopener
87    if not _urlopener:
88        _urlopener = FancyURLopener()
89    return _urlopener.retrieve(url, filename, reporthook, data)
90def urlcleanup():
91    if _urlopener:
92        _urlopener.cleanup()
93
94# check for SSL
95try:
96    import ssl
97except:
98    _have_ssl = False
99else:
100    _have_ssl = True
101
102# exception raised when downloaded size does not match content-length
103class ContentTooShortError(IOError):
104    def __init__(self, message, content):
105        IOError.__init__(self, message)
106        self.content = content
107
108ftpcache = {}
109class URLopener:
110    """Class to open URLs.
111    This is a class rather than just a subroutine because we may need
112    more than one set of global protocol-specific options.
113    Note -- this is a base class for those who don't want the
114    automatic handling of errors type 302 (relocated) and 401
115    (authorization needed)."""
116
117    __tempfiles = None
118
119    version = "Python-urllib/%s" % __version__
120
121    # Constructor
122    def __init__(self, proxies=None, **x509):
123        if proxies is None:
124            proxies = getproxies()
125        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
126        self.proxies = proxies
127        self.key_file = x509.get('key_file')
128        self.cert_file = x509.get('cert_file')
129        self.addheaders = [('User-Agent', self.version)]
130        self.__tempfiles = []
131        self.__unlink = os.unlink # See cleanup()
132        self.tempcache = None
133        # Undocumented feature: if you assign {} to tempcache,
134        # it is used to cache files retrieved with
135        # self.retrieve().  This is not enabled by default
136        # since it does not work for changing documents (and I
137        # haven't got the logic to check expiration headers
138        # yet).
139        self.ftpcache = ftpcache
140        # Undocumented feature: you can use a different
141        # ftp cache by assigning to the .ftpcache member;
142        # in case you want logically independent URL openers
143        # XXX This is not threadsafe.  Bah.
144
145    def __del__(self):
146        self.close()
147
148    def close(self):
149        self.cleanup()
150
151    def cleanup(self):
152        # This code sometimes runs when the rest of this module
153        # has already been deleted, so it can't use any globals
154        # or import anything.
155        if self.__tempfiles:
156            for file in self.__tempfiles:
157                try:
158                    self.__unlink(file)
159                except OSError:
160                    pass
161            del self.__tempfiles[:]
162        if self.tempcache:
163            self.tempcache.clear()
164
165    def addheader(self, *args):
166        """Add a header to be used by the HTTP interface only
167        e.g. u.addheader('Accept', 'sound/basic')"""
168        self.addheaders.append(args)
169
170    # External interface
171    def open(self, fullurl, data=None):
172        """Use URLopener().open(file) instead of open(file, 'r')."""
173        fullurl = unwrap(toBytes(fullurl))
174        if self.tempcache and fullurl in self.tempcache:
175            filename, headers = self.tempcache[fullurl]
176            fp = open(filename, 'rb')
177            return addinfourl(fp, headers, fullurl)
178        urltype, url = splittype(fullurl)
179        if not urltype:
180            urltype = 'file'
181        if urltype in self.proxies:
182            proxy = self.proxies[urltype]
183            urltype, proxyhost = splittype(proxy)
184            host, selector = splithost(proxyhost)
185            url = (host, fullurl) # Signal special case to open_*()
186        else:
187            proxy = None
188        name = 'open_' + urltype
189        self.type = urltype
190        name = name.replace('-', '_')
191        if not hasattr(self, name):
192            if proxy:
193                return self.open_unknown_proxy(proxy, fullurl, data)
194            else:
195                return self.open_unknown(fullurl, data)
196        try:
197            if data is None:
198                return getattr(self, name)(url)
199            else:
200                return getattr(self, name)(url, data)
201        except socket.error, msg:
202            raise IOError, ('socket error', msg), sys.exc_info()[2]
203
204    def open_unknown(self, fullurl, data=None):
205        """Overridable interface to open unknown URL type."""
206        type, url = splittype(fullurl)
207        raise IOError, ('url error', 'unknown url type', type)
208
209    def open_unknown_proxy(self, proxy, fullurl, data=None):
210        """Overridable interface to open unknown URL type."""
211        type, url = splittype(fullurl)
212        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
213
214    # External interface
215    def retrieve(self, url, filename=None, reporthook=None, data=None):
216        """retrieve(url) returns (filename, headers) for a local object
217        or (tempfilename, headers) for a remote object."""
218        url = unwrap(toBytes(url))
219        if self.tempcache and url in self.tempcache:
220            return self.tempcache[url]
221        type, url1 = splittype(url)
222        if filename is None and (not type or type == 'file'):
223            try:
224                fp = self.open_local_file(url1)
225                hdrs = fp.info()
226                del fp
227                return url2pathname(splithost(url1)[1]), hdrs
228            except IOError, msg:
229                pass
230        fp = self.open(url, data)
231        headers = fp.info()
232        if filename:
233            tfp = open(filename, 'wb')
234        else:
235            import tempfile
236            garbage, path = splittype(url)
237            garbage, path = splithost(path or "")
238            path, garbage = splitquery(path or "")
239            path, garbage = splitattr(path or "")
240            suffix = os.path.splitext(path)[1]
241            (fd, filename) = tempfile.mkstemp(suffix)
242            self.__tempfiles.append(filename)
243            tfp = os.fdopen(fd, 'wb')
244        result = filename, headers
245        if self.tempcache is not None:
246            self.tempcache[url] = result
247        bs = 1024*8
248        size = -1
249        read = 0
250        blocknum = 0
251        if reporthook:
252            if "content-length" in headers:
253                size = int(headers["Content-Length"])
254            reporthook(blocknum, bs, size)
255        while 1:
256            block = fp.read(bs)
257            if block == "":
258                break
259            read += len(block)
260            tfp.write(block)
261            blocknum += 1
262            if reporthook:
263                reporthook(blocknum, bs, size)
264        fp.close()
265        tfp.close()
266        del fp
267        del tfp
268
269        # raise exception if actual size does not match content-length header
270        if size >= 0 and read < size:
271            raise ContentTooShortError("retrieval incomplete: got only %i out "
272                                       "of %i bytes" % (read, size), result)
273
274        return result
275
276    # Each method named open_<type> knows how to open that type of URL
277
278    def open_http(self, url, data=None):
279        """Use HTTP protocol."""
280        import httplib
281        user_passwd = None
282        proxy_passwd= None
283        if isinstance(url, str):
284            host, selector = splithost(url)
285            if host:
286                user_passwd, host = splituser(host)
287                host = unquote(host)
288            realhost = host
289        else:
290            host, selector = url
291            # check whether the proxy contains authorization information
292            proxy_passwd, host = splituser(host)
293            # now we proceed with the url we want to obtain
294            urltype, rest = splittype(selector)
295            url = rest
296            user_passwd = None
297            if urltype.lower() != 'http':
298                realhost = None
299            else:
300                realhost, rest = splithost(rest)
301                if realhost:
302                    user_passwd, realhost = splituser(realhost)
303                if user_passwd:
304                    selector = "%s://%s%s" % (urltype, realhost, rest)
305                if proxy_bypass(realhost):
306                    host = realhost
307
308            #print "proxy via http:", host, selector
309        if not host: raise IOError, ('http error', 'no host given')
310
311        if proxy_passwd:
312            import base64
313            proxy_auth = base64.b64encode(proxy_passwd).strip()
314        else:
315            proxy_auth = None
316
317        if user_passwd:
318            import base64
319            auth = base64.b64encode(user_passwd).strip()
320        else:
321            auth = None
322        h = httplib.HTTP(host)
323        if data is not None:
324            h.putrequest('POST', selector)
325            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
326            h.putheader('Content-Length', '%d' % len(data))
327        else:
328            h.putrequest('GET', selector)
329        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
330        if auth: h.putheader('Authorization', 'Basic %s' % auth)
331        if realhost: h.putheader('Host', realhost)
332        for args in self.addheaders: h.putheader(*args)
333        h.endheaders()
334        if data is not None:
335            h.send(data)
336        errcode, errmsg, headers = h.getreply()
337        fp = h.getfile()
338        if errcode == -1:
339            if fp: fp.close()
340            # something went wrong with the HTTP status line
341            raise IOError, ('http protocol error', 0,
342                            'got a bad status line', None)
343        # According to RFC 2616, "2xx" code indicates that the client's
344        # request was successfully received, understood, and accepted.
345        if (200 <= errcode < 300):
346            return addinfourl(fp, headers, "http:" + url, errcode)
347        else:
348            if data is None:
349                return self.http_error(url, fp, errcode, errmsg, headers)
350            else:
351                return self.http_error(url, fp, errcode, errmsg, headers, data)
352
353    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
354        """Handle http errors.
355        Derived class can override this, or provide specific handlers
356        named http_error_DDD where DDD is the 3-digit error code."""
357        # First check if there's a specific handler for this error
358        name = 'http_error_%d' % errcode
359        if hasattr(self, name):
360            method = getattr(self, name)
361            if data is None:
362                result = method(url, fp, errcode, errmsg, headers)
363            else:
364                result = method(url, fp, errcode, errmsg, headers, data)
365            if result: return result
366        return self.http_error_default(url, fp, errcode, errmsg, headers)
367
368    def http_error_default(self, url, fp, errcode, errmsg, headers):
369        """Default error handler: close the connection and raise IOError."""
370        void = fp.read()
371        fp.close()
372        raise IOError, ('http error', errcode, errmsg, headers)
373
374    if _have_ssl:
375        def open_https(self, url, data=None):
376            """Use HTTPS protocol."""
377
378            import httplib
379            user_passwd = None
380            proxy_passwd = None
381            if isinstance(url, str):
382                host, selector = splithost(url)
383                if host:
384                    user_passwd, host = splituser(host)
385                    host = unquote(host)
386                realhost = host
387            else:
388                host, selector = url
389                # here, we determine, whether the proxy contains authorization information
390                proxy_passwd, host = splituser(host)
391                urltype, rest = splittype(selector)
392                url = rest
393                user_passwd = None
394                if urltype.lower() != 'https':
395                    realhost = None
396                else:
397                    realhost, rest = splithost(rest)
398                    if realhost:
399                        user_passwd, realhost = splituser(realhost)
400                    if user_passwd:
401                        selector = "%s://%s%s" % (urltype, realhost, rest)
402                #print "proxy via https:", host, selector
403            if not host: raise IOError, ('https error', 'no host given')
404            if proxy_passwd:
405                import base64
406                proxy_auth = base64.b64encode(proxy_passwd).strip()
407            else:
408                proxy_auth = None
409            if user_passwd:
410                import base64
411                auth = base64.b64encode(user_passwd).strip()
412            else:
413                auth = None
414            h = httplib.HTTPS(host, 0,
415                              key_file=self.key_file,
416                              cert_file=self.cert_file)
417            if data is not None:
418                h.putrequest('POST', selector)
419                h.putheader('Content-Type',
420                            'application/x-www-form-urlencoded')
421                h.putheader('Content-Length', '%d' % len(data))
422            else:
423                h.putrequest('GET', selector)
424            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
425            if auth: h.putheader('Authorization', 'Basic %s' % auth)
426            if realhost: h.putheader('Host', realhost)
427            for args in self.addheaders: h.putheader(*args)
428            h.endheaders()
429            if data is not None:
430                h.send(data)
431            errcode, errmsg, headers = h.getreply()
432            fp = h.getfile()
433            if errcode == -1:
434                if fp: fp.close()
435                # something went wrong with the HTTP status line
436                raise IOError, ('http protocol error', 0,
437                                'got a bad status line', None)
438            # According to RFC 2616, "2xx" code indicates that the client's
439            # request was successfully received, understood, and accepted.
440            if (200 <= errcode < 300):
441                return addinfourl(fp, headers, "https:" + url, errcode)
442            else:
443                if data is None:
444                    return self.http_error(url, fp, errcode, errmsg, headers)
445                else:
446                    return self.http_error(url, fp, errcode, errmsg, headers,
447                                           data)
448
449    def open_file(self, url):
450        """Use local file or FTP depending on form of URL."""
451        if not isinstance(url, str):
452            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
453        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
454            return self.open_ftp(url)
455        else:
456            return self.open_local_file(url)
457
458    def open_local_file(self, url):
459        """Use local file."""
460        import mimetypes, mimetools, email.utils
461        try:
462            from cStringIO import StringIO
463        except ImportError:
464            from StringIO import StringIO
465        host, file = splithost(url)
466        localname = url2pathname(file)
467        try:
468            stats = os.stat(localname)
469        except OSError, e:
470            raise IOError(e.errno, e.strerror, e.filename)
471        size = stats.st_size
472        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
473        mtype = mimetypes.guess_type(url)[0]
474        headers = mimetools.Message(StringIO(
475            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
476            (mtype or 'text/plain', size, modified)))
477        if not host:
478            urlfile = file
479            if file[:1] == '/':
480                urlfile = 'file://' + file
481            return addinfourl(open(localname, 'rb'),
482                              headers, urlfile)
483        host, port = splitport(host)
484        if not port \
485           and socket.gethostbyname(host) in (localhost(), thishost()):
486            urlfile = file
487            if file[:1] == '/':
488                urlfile = 'file://' + file
489            return addinfourl(open(localname, 'rb'),
490                              headers, urlfile)
491        raise IOError, ('local file error', 'not on local host')
492
493    def open_ftp(self, url):
494        """Use FTP protocol."""
495        if not isinstance(url, str):
496            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
497        import mimetypes, mimetools
498        try:
499            from cStringIO import StringIO
500        except ImportError:
501            from StringIO import StringIO
502        host, path = splithost(url)
503        if not host: raise IOError, ('ftp error', 'no host given')
504        host, port = splitport(host)
505        user, host = splituser(host)
506        if user: user, passwd = splitpasswd(user)
507        else: passwd = None
508        host = unquote(host)
509        user = unquote(user or '')
510        passwd = unquote(passwd or '')
511        host = socket.gethostbyname(host)
512        if not port:
513            import ftplib
514            port = ftplib.FTP_PORT
515        else:
516            port = int(port)
517        path, attrs = splitattr(path)
518        path = unquote(path)
519        dirs = path.split('/')
520        dirs, file = dirs[:-1], dirs[-1]
521        if dirs and not dirs[0]: dirs = dirs[1:]
522        if dirs and not dirs[0]: dirs[0] = '/'
523        key = user, host, port, '/'.join(dirs)
524        # XXX thread unsafe!
525        if len(self.ftpcache) > MAXFTPCACHE:
526            # Prune the cache, rather arbitrarily
527            for k in self.ftpcache.keys():
528                if k != key:
529                    v = self.ftpcache[k]
530                    del self.ftpcache[k]
531                    v.close()
532        try:
533            if not key in self.ftpcache:
534                self.ftpcache[key] = \
535                    ftpwrapper(user, passwd, host, port, dirs)
536            if not file: type = 'D'
537            else: type = 'I'
538            for attr in attrs:
539                attr, value = splitvalue(attr)
540                if attr.lower() == 'type' and \
541                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
542                    type = value.upper()
543            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
544            mtype = mimetypes.guess_type("ftp:" + url)[0]
545            headers = ""
546            if mtype:
547                headers += "Content-Type: %s\n" % mtype
548            if retrlen is not None and retrlen >= 0:
549                headers += "Content-Length: %d\n" % retrlen
550            headers = mimetools.Message(StringIO(headers))
551            return addinfourl(fp, headers, "ftp:" + url)
552        except ftperrors(), msg:
553            raise IOError, ('ftp error', msg), sys.exc_info()[2]
554
555    def open_data(self, url, data=None):
556        """Use "data" URL."""
557        if not isinstance(url, str):
558            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
559        # ignore POSTed data
560        #
561        # syntax of data URLs:
562        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
563        # mediatype := [ type "/" subtype ] *( ";" parameter )
564        # data      := *urlchar
565        # parameter := attribute "=" value
566        import mimetools
567        try:
568            from cStringIO import StringIO
569        except ImportError:
570            from StringIO import StringIO
571        try:
572            [type, data] = url.split(',', 1)
573        except ValueError:
574            raise IOError, ('data error', 'bad data URL')
575        if not type:
576            type = 'text/plain;charset=US-ASCII'
577        semi = type.rfind(';')
578        if semi >= 0 and '=' not in type[semi:]:
579            encoding = type[semi+1:]
580            type = type[:semi]
581        else:
582            encoding = ''
583        msg = []
584        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
585                                            time.gmtime(time.time())))
586        msg.append('Content-type: %s' % type)
587        if encoding == 'base64':
588            import base64
589            data = base64.decodestring(data)
590        else:
591            data = unquote(data)
592        msg.append('Content-Length: %d' % len(data))
593        msg.append('')
594        msg.append(data)
595        msg = '\n'.join(msg)
596        f = StringIO(msg)
597        headers = mimetools.Message(f, 0)
598        #f.fileno = None     # needed for addinfourl
599        return addinfourl(f, headers, url)
600
601
602class FancyURLopener(URLopener):
603    """Derived class with handlers for errors we can handle (perhaps)."""
604
605    def __init__(self, *args, **kwargs):
606        URLopener.__init__(self, *args, **kwargs)
607        self.auth_cache = {}
608        self.tries = 0
609        self.maxtries = 10
610
611    def http_error_default(self, url, fp, errcode, errmsg, headers):
612        """Default error handling -- don't raise an exception."""
613        return addinfourl(fp, headers, "http:" + url, errcode)
614
615    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
616        """Error 302 -- relocated (temporarily)."""
617        self.tries += 1
618        if self.maxtries and self.tries >= self.maxtries:
619            if hasattr(self, "http_error_500"):
620                meth = self.http_error_500
621            else:
622                meth = self.http_error_default
623            self.tries = 0
624            return meth(url, fp, 500,
625                        "Internal Server Error: Redirect Recursion", headers)
626        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
627                                        data)
628        self.tries = 0
629        return result
630
631    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
632        if 'location' in headers:
633            newurl = headers['location']
634        elif 'uri' in headers:
635            newurl = headers['uri']
636        else:
637            return
638        void = fp.read()
639        fp.close()
640        # In case the server sent a relative URL, join with original:
641        newurl = basejoin(self.type + ":" + url, newurl)
642        return self.open(newurl)
643
644    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
645        """Error 301 -- also relocated (permanently)."""
646        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
647
648    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
649        """Error 303 -- also relocated (essentially identical to 302)."""
650        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
651
652    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
653        """Error 307 -- relocated, but turn POST into error."""
654        if data is None:
655            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
656        else:
657            return self.http_error_default(url, fp, errcode, errmsg, headers)
658
659    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
660        """Error 401 -- authentication required.
661        This function supports Basic authentication only."""
662        if not 'www-authenticate' in headers:
663            URLopener.http_error_default(self, url, fp,
664                                         errcode, errmsg, headers)
665        stuff = headers['www-authenticate']
666        import re
667        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
668        if not match:
669            URLopener.http_error_default(self, url, fp,
670                                         errcode, errmsg, headers)
671        scheme, realm = match.groups()
672        if scheme.lower() != 'basic':
673            URLopener.http_error_default(self, url, fp,
674                                         errcode, errmsg, headers)
675        name = 'retry_' + self.type + '_basic_auth'
676        if data is None:
677            return getattr(self,name)(url, realm)
678        else:
679            return getattr(self,name)(url, realm, data)
680
681    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
682        """Error 407 -- proxy authentication required.
683        This function supports Basic authentication only."""
684        if not 'proxy-authenticate' in headers:
685            URLopener.http_error_default(self, url, fp,
686                                         errcode, errmsg, headers)
687        stuff = headers['proxy-authenticate']
688        import re
689        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
690        if not match:
691            URLopener.http_error_default(self, url, fp,
692                                         errcode, errmsg, headers)
693        scheme, realm = match.groups()
694        if scheme.lower() != 'basic':
695            URLopener.http_error_default(self, url, fp,
696                                         errcode, errmsg, headers)
697        name = 'retry_proxy_' + self.type + '_basic_auth'
698        if data is None:
699            return getattr(self,name)(url, realm)
700        else:
701            return getattr(self,name)(url, realm, data)
702
703    def retry_proxy_http_basic_auth(self, url, realm, data=None):
704        host, selector = splithost(url)
705        newurl = 'http://' + host + selector
706        proxy = self.proxies['http']
707        urltype, proxyhost = splittype(proxy)
708        proxyhost, proxyselector = splithost(proxyhost)
709        i = proxyhost.find('@') + 1
710        proxyhost = proxyhost[i:]
711        user, passwd = self.get_user_passwd(proxyhost, realm, i)
712        if not (user or passwd): return None
713        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
714        self.proxies['http'] = 'http://' + proxyhost + proxyselector
715        if data is None:
716            return self.open(newurl)
717        else:
718            return self.open(newurl, data)
719
720    def retry_proxy_https_basic_auth(self, url, realm, data=None):
721        host, selector = splithost(url)
722        newurl = 'https://' + host + selector
723        proxy = self.proxies['https']
724        urltype, proxyhost = splittype(proxy)
725        proxyhost, proxyselector = splithost(proxyhost)
726        i = proxyhost.find('@') + 1
727        proxyhost = proxyhost[i:]
728        user, passwd = self.get_user_passwd(proxyhost, realm, i)
729        if not (user or passwd): return None
730        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
731        self.proxies['https'] = 'https://' + proxyhost + proxyselector
732        if data is None:
733            return self.open(newurl)
734        else:
735            return self.open(newurl, data)
736
737    def retry_http_basic_auth(self, url, realm, data=None):
738        host, selector = splithost(url)
739        i = host.find('@') + 1
740        host = host[i:]
741        user, passwd = self.get_user_passwd(host, realm, i)
742        if not (user or passwd): return None
743        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
744        newurl = 'http://' + host + selector
745        if data is None:
746            return self.open(newurl)
747        else:
748            return self.open(newurl, data)
749
750    def retry_https_basic_auth(self, url, realm, data=None):
751        host, selector = splithost(url)
752        i = host.find('@') + 1
753        host = host[i:]
754        user, passwd = self.get_user_passwd(host, realm, i)
755        if not (user or passwd): return None
756        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
757        newurl = 'https://' + host + selector
758        if data is None:
759            return self.open(newurl)
760        else:
761            return self.open(newurl, data)
762
763    def get_user_passwd(self, host, realm, clear_cache = 0):
764        key = realm + '@' + host.lower()
765        if key in self.auth_cache:
766            if clear_cache:
767                del self.auth_cache[key]
768            else:
769                return self.auth_cache[key]
770        user, passwd = self.prompt_user_passwd(host, realm)
771        if user or passwd: self.auth_cache[key] = (user, passwd)
772        return user, passwd
773
774    def prompt_user_passwd(self, host, realm):
775        """Override this in a GUI environment!"""
776        import getpass
777        try:
778            user = raw_input("Enter username for %s at %s: " % (realm,
779                                                                host))
780            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
781                (user, realm, host))
782            return user, passwd
783        except KeyboardInterrupt:
784            print
785            return None, None
786
787
788# Utility functions
789
790_localhost = None
791def localhost():
792    """Return the IP address of the magic hostname 'localhost'."""
793    global _localhost
794    if _localhost is None:
795        _localhost = socket.gethostbyname('localhost')
796    return _localhost
797
798_thishost = None
799def thishost():
800    """Return the IP address of the current host."""
801    global _thishost
802    if _thishost is None:
803        _thishost = socket.gethostbyname(socket.gethostname())
804    return _thishost
805
806_ftperrors = None
807def ftperrors():
808    """Return the set of errors raised by the FTP class."""
809    global _ftperrors
810    if _ftperrors is None:
811        import ftplib
812        _ftperrors = ftplib.all_errors
813    return _ftperrors
814
815_noheaders = None
816def noheaders():
817    """Return an empty mimetools.Message object."""
818    global _noheaders
819    if _noheaders is None:
820        import mimetools
821        try:
822            from cStringIO import StringIO
823        except ImportError:
824            from StringIO import StringIO
825        _noheaders = mimetools.Message(StringIO(), 0)
826        _noheaders.fp.close()   # Recycle file descriptor
827    return _noheaders
828
829
830# Utility classes
831
832class ftpwrapper:
833    """Class used by open_ftp() for cache of open FTP connections."""
834
835    def __init__(self, user, passwd, host, port, dirs, timeout=None):
836        self.user = user
837        self.passwd = passwd
838        self.host = host
839        self.port = port
840        self.dirs = dirs
841        self.timeout = timeout
842        self.init()
843
844    def init(self):
845        import ftplib
846        self.busy = 0
847        self.ftp = ftplib.FTP()
848        self.ftp.connect(self.host, self.port, self.timeout)
849        self.ftp.login(self.user, self.passwd)
850        for dir in self.dirs:
851            self.ftp.cwd(dir)
852
853    def retrfile(self, file, type):
854        import ftplib
855        self.endtransfer()
856        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
857        else: cmd = 'TYPE ' + type; isdir = 0
858        try:
859            self.ftp.voidcmd(cmd)
860        except ftplib.all_errors:
861            self.init()
862            self.ftp.voidcmd(cmd)
863        conn = None
864        if file and not isdir:
865            # Try to retrieve as a file
866            try:
867                cmd = 'RETR ' + file
868                conn = self.ftp.ntransfercmd(cmd)
869            except ftplib.error_perm, reason:
870                if str(reason)[:3] != '550':
871                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
872        if not conn:
873            # Set transfer mode to ASCII!
874            self.ftp.voidcmd('TYPE A')
875            # Try a directory listing. Verify that directory exists.
876            if file:
877                pwd = self.ftp.pwd()
878                try:
879                    try:
880                        self.ftp.cwd(file)
881                    except ftplib.error_perm, reason:
882                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
883                finally:
884                    self.ftp.cwd(pwd)
885                cmd = 'LIST ' + file
886            else:
887                cmd = 'LIST'
888            conn = self.ftp.ntransfercmd(cmd)
889        self.busy = 1
890        # Pass back both a suitably decorated object and a retrieval length
891        return (addclosehook(conn[0].makefile('rb'),
892                             self.endtransfer), conn[1])
893    def endtransfer(self):
894        if not self.busy:
895            return
896        self.busy = 0
897        try:
898            self.ftp.voidresp()
899        except ftperrors():
900            pass
901
902    def close(self):
903        self.endtransfer()
904        try:
905            self.ftp.close()
906        except ftperrors():
907            pass
908
909class addbase:
910    """Base class for addinfo and addclosehook."""
911
912    def __init__(self, fp):
913        self.fp = fp
914        self.read = self.fp.read
915        self.readline = self.fp.readline
916        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
917        if hasattr(self.fp, "fileno"):
918            self.fileno = self.fp.fileno
919        else:
920            self.fileno = lambda: None
921        if hasattr(self.fp, "__iter__"):
922            self.__iter__ = self.fp.__iter__
923            if hasattr(self.fp, "next"):
924                self.next = self.fp.next
925
926    def __repr__(self):
927        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
928                                             id(self), self.fp)
929
930    def close(self):
931        self.read = None
932        self.readline = None
933        self.readlines = None
934        self.fileno = None
935        if self.fp: self.fp.close()
936        self.fp = None
937
938class addclosehook(addbase):
939    """Class to add a close hook to an open file."""
940
941    def __init__(self, fp, closehook, *hookargs):
942        addbase.__init__(self, fp)
943        self.closehook = closehook
944        self.hookargs = hookargs
945
946    def close(self):
947        addbase.close(self)
948        if self.closehook:
949            self.closehook(*self.hookargs)
950            self.closehook = None
951            self.hookargs = None
952
953class addinfo(addbase):
954    """class to add an info() method to an open file."""
955
956    def __init__(self, fp, headers):
957        addbase.__init__(self, fp)
958        self.headers = headers
959
960    def info(self):
961        return self.headers
962
963class addinfourl(addbase):
964    """class to add info() and geturl() methods to an open file."""
965
966    def __init__(self, fp, headers, url, code=None):
967        addbase.__init__(self, fp)
968        self.headers = headers
969        self.url = url
970        self.code = code
971
972    def info(self):
973        return self.headers
974
975    def getcode(self):
976        return self.code
977
978    def geturl(self):
979        return self.url
980
981
982# Utilities to parse URLs (most of these return None for missing parts):
983# unwrap('<URL:type://host/path>') --> 'type://host/path'
984# splittype('type:opaquestring') --> 'type', 'opaquestring'
985# splithost('//host[:port]/path') --> 'host[:port]', '/path'
986# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
987# splitpasswd('user:passwd') -> 'user', 'passwd'
988# splitport('host:port') --> 'host', 'port'
989# splitquery('/path?query') --> '/path', 'query'
990# splittag('/path#tag') --> '/path', 'tag'
991# splitattr('/path;attr1=value1;attr2=value2;...') ->
992#   '/path', ['attr1=value1', 'attr2=value2', ...]
993# splitvalue('attr=value') --> 'attr', 'value'
994# unquote('abc%20def') -> 'abc def'
995# quote('abc def') -> 'abc%20def')
996
997try:
998    unicode
999except NameError:
1000    def _is_unicode(x):
1001        return 0
1002else:
1003    def _is_unicode(x):
1004        return isinstance(x, unicode)
1005
1006def toBytes(url):
1007    """toBytes(u"URL") --> 'URL'."""
1008    # Most URL schemes require ASCII. If that changes, the conversion
1009    # can be relaxed
1010    if _is_unicode(url):
1011        try:
1012            url = url.encode("ASCII")
1013        except UnicodeError:
1014            raise UnicodeError("URL " + repr(url) +
1015                               " contains non-ASCII characters")
1016    return url
1017
1018def unwrap(url):
1019    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1020    url = url.strip()
1021    if url[:1] == '<' and url[-1:] == '>':
1022        url = url[1:-1].strip()
1023    if url[:4] == 'URL:': url = url[4:].strip()
1024    return url
1025
1026_typeprog = None
1027def splittype(url):
1028    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1029    global _typeprog
1030    if _typeprog is None:
1031        import re
1032        _typeprog = re.compile('^([^/:]+):')
1033
1034    match = _typeprog.match(url)
1035    if match:
1036        scheme = match.group(1)
1037        return scheme.lower(), url[len(scheme) + 1:]
1038    return None, url
1039
1040_hostprog = None
1041def splithost(url):
1042    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1043    global _hostprog
1044    if _hostprog is None:
1045        import re
1046        _hostprog = re.compile('^//([^/?]*)(.*)$')
1047
1048    match = _hostprog.match(url)
1049    if match: return match.group(1, 2)
1050    return None, url
1051
1052_userprog = None
1053def splituser(host):
1054    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1055    global _userprog
1056    if _userprog is None:
1057        import re
1058        _userprog = re.compile('^(.*)@(.*)$')
1059
1060    match = _userprog.match(host)
1061    if match: return map(unquote, match.group(1, 2))
1062    return None, host
1063
1064_passwdprog = None
1065def splitpasswd(user):
1066    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1067    global _passwdprog
1068    if _passwdprog is None:
1069        import re
1070        _passwdprog = re.compile('^([^:]*):(.*)$')
1071
1072    match = _passwdprog.match(user)
1073    if match: return match.group(1, 2)
1074    return user, None
1075
1076# splittag('/path#tag') --> '/path', 'tag'
1077_portprog = None
1078def splitport(host):
1079    """splitport('host:port') --> 'host', 'port'."""
1080    global _portprog
1081    if _portprog is None:
1082        import re
1083        _portprog = re.compile('^(.*):([0-9]+)$')
1084
1085    match = _portprog.match(host)
1086    if match: return match.group(1, 2)
1087    return host, None
1088
1089_nportprog = None
1090def splitnport(host, defport=-1):
1091    """Split host and port, returning numeric port.
1092    Return given default port if no ':' found; defaults to -1.
1093    Return numerical port if a valid number are found after ':'.
1094    Return None if ':' but not a valid number."""
1095    global _nportprog
1096    if _nportprog is None:
1097        import re
1098        _nportprog = re.compile('^(.*):(.*)$')
1099
1100    match = _nportprog.match(host)
1101    if match:
1102        host, port = match.group(1, 2)
1103        try:
1104            if not port: raise ValueError, "no digits"
1105            nport = int(port)
1106        except ValueError:
1107            nport = None
1108        return host, nport
1109    return host, defport
1110
1111_queryprog = None
1112def splitquery(url):
1113    """splitquery('/path?query') --> '/path', 'query'."""
1114    global _queryprog
1115    if _queryprog is None:
1116        import re
1117        _queryprog = re.compile('^(.*)\?([^?]*)$')
1118
1119    match = _queryprog.match(url)
1120    if match: return match.group(1, 2)
1121    return url, None
1122
1123_tagprog = None
1124def splittag(url):
1125    """splittag('/path#tag') --> '/path', 'tag'."""
1126    global _tagprog
1127    if _tagprog is None:
1128        import re
1129        _tagprog = re.compile('^(.*)#([^#]*)$')
1130
1131    match = _tagprog.match(url)
1132    if match: return match.group(1, 2)
1133    return url, None
1134
1135def splitattr(url):
1136    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1137        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1138    words = url.split(';')
1139    return words[0], words[1:]
1140
1141_valueprog = None
1142def splitvalue(attr):
1143    """splitvalue('attr=value') --> 'attr', 'value'."""
1144    global _valueprog
1145    if _valueprog is None:
1146        import re
1147        _valueprog = re.compile('^([^=]*)=(.*)$')
1148
1149    match = _valueprog.match(attr)
1150    if match: return match.group(1, 2)
1151    return attr, None
1152
1153_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1154_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1155
1156def unquote(s):
1157    """unquote('abc%20def') -> 'abc def'."""
1158    res = s.split('%')
1159    for i in xrange(1, len(res)):
1160        item = res[i]
1161        try:
1162            res[i] = _hextochr[item[:2]] + item[2:]
1163        except KeyError:
1164            res[i] = '%' + item
1165        except UnicodeDecodeError:
1166            res[i] = unichr(int(item[:2], 16)) + item[2:]
1167    return "".join(res)
1168
1169def unquote_plus(s):
1170    """unquote('%7e/abc+def') -> '~/abc def'"""
1171    s = s.replace('+', ' ')
1172    return unquote(s)
1173
1174always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1175               'abcdefghijklmnopqrstuvwxyz'
1176               '0123456789' '_.-')
1177_safemaps = {}
1178
1179def quote(s, safe = '/'):
1180    """quote('abc def') -> 'abc%20def'
1181
1182    Each part of a URL, e.g. the path info, the query, etc., has a
1183    different set of reserved characters that must be quoted.
1184
1185    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1186    the following reserved characters.
1187
1188    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1189                  "$" | ","
1190
1191    Each of these characters is reserved in some component of a URL,
1192    but not necessarily in all of them.
1193
1194    By default, the quote function is intended for quoting the path
1195    section of a URL.  Thus, it will not encode '/'.  This character
1196    is reserved, but in typical usage the quote function is being
1197    called on a path where the existing slash characters are used as
1198    reserved characters.
1199    """
1200    cachekey = (safe, always_safe)
1201    try:
1202        safe_map = _safemaps[cachekey]
1203    except KeyError:
1204        safe += always_safe
1205        safe_map = {}
1206        for i in range(256):
1207            c = chr(i)
1208            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1209        _safemaps[cachekey] = safe_map
1210    res = map(safe_map.__getitem__, s)
1211    return ''.join(res)
1212
1213def quote_plus(s, safe = ''):
1214    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1215    if ' ' in s:
1216        s = quote(s, safe + ' ')
1217        return s.replace(' ', '+')
1218    return quote(s, safe)
1219
1220def urlencode(query,doseq=0):
1221    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1222
1223    If any values in the query arg are sequences and doseq is true, each
1224    sequence element is converted to a separate parameter.
1225
1226    If the query arg is a sequence of two-element tuples, the order of the
1227    parameters in the output will match the order of parameters in the
1228    input.
1229    """
1230
1231    if hasattr(query,"items"):
1232        # mapping objects
1233        query = query.items()
1234    else:
1235        # it's a bother at times that strings and string-like objects are
1236        # sequences...
1237        try:
1238            # non-sequence items should not work with len()
1239            # non-empty strings will fail this
1240            if len(query) and not isinstance(query[0], tuple):
1241                raise TypeError
1242            # zero-length sequences of all types will get here and succeed,
1243            # but that's a minor nit - since the original implementation
1244            # allowed empty dicts that type of behavior probably should be
1245            # preserved for consistency
1246        except TypeError:
1247            ty,va,tb = sys.exc_info()
1248            raise TypeError, "not a valid non-string sequence or mapping object", tb
1249
1250    l = []
1251    if not doseq:
1252        # preserve old behavior
1253        for k, v in query:
1254            k = quote_plus(str(k))
1255            v = quote_plus(str(v))
1256            l.append(k + '=' + v)
1257    else:
1258        for k, v in query:
1259            k = quote_plus(str(k))
1260            if isinstance(v, str):
1261                v = quote_plus(v)
1262                l.append(k + '=' + v)
1263            elif _is_unicode(v):
1264                # is there a reasonable way to convert to ASCII?
1265                # encode generates a string, but "replace" or "ignore"
1266                # lose information and "strict" can raise UnicodeError
1267                v = quote_plus(v.encode("ASCII","replace"))
1268                l.append(k + '=' + v)
1269            else:
1270                try:
1271                    # is this a sufficient test for sequence-ness?
1272                    x = len(v)
1273                except TypeError:
1274                    # not a sequence
1275                    v = quote_plus(str(v))
1276                    l.append(k + '=' + v)
1277                else:
1278                    # loop over the sequence
1279                    for elt in v:
1280                        l.append(k + '=' + quote_plus(str(elt)))
1281    return '&'.join(l)
1282
1283# Proxy handling
1284def getproxies_environment():
1285    """Return a dictionary of scheme -> proxy server URL mappings.
1286
1287    Scan the environment for variables named <scheme>_proxy;
1288    this seems to be the standard convention.  If you need a
1289    different way, you can pass a proxies dictionary to the
1290    [Fancy]URLopener constructor.
1291
1292    """
1293    proxies = {}
1294    for name, value in os.environ.items():
1295        name = name.lower()
1296        if name == 'no_proxy':
1297            # handled in proxy_bypass_environment
1298            continue
1299        if value and name[-6:] == '_proxy':
1300            proxies[name[:-6]] = value
1301    return proxies
1302
1303def proxy_bypass_environment(host):
1304    """Test if proxies should not be used for a particular host.
1305
1306    Checks the environment for a variable named no_proxy, which should
1307    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1308    """
1309    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1310    # '*' is special case for always bypass
1311    if no_proxy == '*':
1312        return 1
1313    # strip port off host
1314    hostonly, port = splitport(host)
1315    # check if the host ends with any of the DNS suffixes
1316    for name in no_proxy.split(','):
1317        if name and (hostonly.endswith(name) or host.endswith(name)):
1318            return 1
1319    # otherwise, don't bypass
1320    return 0
1321
1322
1323if sys.platform == 'darwin':
1324    def getproxies_internetconfig():
1325        """Return a dictionary of scheme -> proxy server URL mappings.
1326
1327        By convention the mac uses Internet Config to store
1328        proxies.  An HTTP proxy, for instance, is stored under
1329        the HttpProxy key.
1330
1331        """
1332        try:
1333            import ic
1334        except ImportError:
1335            return {}
1336
1337        try:
1338            config = ic.IC()
1339        except ic.error:
1340            return {}
1341        proxies = {}
1342        # HTTP:
1343        if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1344            try:
1345                value = config['HTTPProxyHost']
1346            except ic.error:
1347                pass
1348            else:
1349                proxies['http'] = 'http://%s' % value
1350        # FTP: XXX To be done.
1351        # Gopher: XXX To be done.
1352        return proxies
1353
1354    def proxy_bypass(host):
1355        if getproxies_environment():
1356            return proxy_bypass_environment(host)
1357        else:
1358            return 0
1359
1360    def getproxies():
1361        return getproxies_environment() or getproxies_internetconfig()
1362
1363elif os.name == 'nt':
1364    def getproxies_registry():
1365        """Return a dictionary of scheme -> proxy server URL mappings.
1366
1367        Win32 uses the registry to store proxies.
1368
1369        """
1370        proxies = {}
1371        try:
1372            import _winreg
1373        except ImportError:
1374            # Std module, so should be around - but you never know!
1375            return proxies
1376        try:
1377            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1378                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1379            proxyEnable = _winreg.QueryValueEx(internetSettings,
1380                                               'ProxyEnable')[0]
1381            if proxyEnable:
1382                # Returned as Unicode but problems if not converted to ASCII
1383                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1384                                                       'ProxyServer')[0])
1385                if '=' in proxyServer:
1386                    # Per-protocol settings
1387                    for p in proxyServer.split(';'):
1388                        protocol, address = p.split('=', 1)
1389                        # See if address has a type:// prefix
1390                        import re
1391                        if not re.match('^([^/:]+)://', address):
1392                            address = '%s://%s' % (protocol, address)
1393                        proxies[protocol] = address
1394                else:
1395                    # Use one setting for all protocols
1396                    if proxyServer[:5] == 'http:':
1397                        proxies['http'] = proxyServer
1398                    else:
1399                        proxies['http'] = 'http://%s' % proxyServer
1400                        proxies['ftp'] = 'ftp://%s' % proxyServer
1401            internetSettings.Close()
1402        except (WindowsError, ValueError, TypeError):
1403            # Either registry key not found etc, or the value in an
1404            # unexpected format.
1405            # proxies already set up to be empty so nothing to do
1406            pass
1407        return proxies
1408
1409    def getproxies():
1410        """Return a dictionary of scheme -> proxy server URL mappings.
1411
1412        Returns settings gathered from the environment, if specified,
1413        or the registry.
1414
1415        """
1416        return getproxies_environment() or getproxies_registry()
1417
1418    def proxy_bypass_registry(host):
1419        try:
1420            import _winreg
1421            import re
1422        except ImportError:
1423            # Std modules, so should be around - but you never know!
1424            return 0
1425        try:
1426            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1427                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1428            proxyEnable = _winreg.QueryValueEx(internetSettings,
1429                                               'ProxyEnable')[0]
1430            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1431                                                     'ProxyOverride')[0])
1432            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1433        except WindowsError:
1434            return 0
1435        if not proxyEnable or not proxyOverride:
1436            return 0
1437        # try to make a host list from name and IP address.
1438        rawHost, port = splitport(host)
1439        host = [rawHost]
1440        try:
1441            addr = socket.gethostbyname(rawHost)
1442            if addr != rawHost:
1443                host.append(addr)
1444        except socket.error:
1445            pass
1446        try:
1447            fqdn = socket.getfqdn(rawHost)
1448            if fqdn != rawHost:
1449                host.append(fqdn)
1450        except socket.error:
1451            pass
1452        # make a check value list from the registry entry: replace the
1453        # '<local>' string by the localhost entry and the corresponding
1454        # canonical entry.
1455        proxyOverride = proxyOverride.split(';')
1456        i = 0
1457        while i < len(proxyOverride):
1458            if proxyOverride[i] == '<local>':
1459                proxyOverride[i:i+1] = ['localhost',
1460                                        '127.0.0.1',
1461                                        socket.gethostname(),
1462                                        socket.gethostbyname(
1463                                            socket.gethostname())]
1464            i += 1
1465        # print proxyOverride
1466        # now check if we match one of the registry values.
1467        for test in proxyOverride:
1468            test = test.replace(".", r"\.")     # mask dots
1469            test = test.replace("*", r".*")     # change glob sequence
1470            test = test.replace("?", r".")      # change glob char
1471            for val in host:
1472                # print "%s <--> %s" %( test, val )
1473                if re.match(test, val, re.I):
1474                    return 1
1475        return 0
1476
1477    def proxy_bypass(host):
1478        """Return a dictionary of scheme -> proxy server URL mappings.
1479
1480        Returns settings gathered from the environment, if specified,
1481        or the registry.
1482
1483        """
1484        if getproxies_environment():
1485            return proxy_bypass_environment(host)
1486        else:
1487            return proxy_bypass_registry(host)
1488
1489else:
1490    # By default use environment variables
1491    getproxies = getproxies_environment
1492    proxy_bypass = proxy_bypass_environment
1493
1494# Test and time quote() and unquote()
1495def test1():
1496    s = ''
1497    for i in range(256): s = s + chr(i)
1498    s = s*4
1499    t0 = time.time()
1500    qs = quote(s)
1501    uqs = unquote(qs)
1502    t1 = time.time()
1503    if uqs != s:
1504        print 'Wrong!'
1505    print repr(s)
1506    print repr(qs)
1507    print repr(uqs)
1508    print round(t1 - t0, 3), 'sec'
1509
1510
1511def reporthook(blocknum, blocksize, totalsize):
1512    # Report during remote transfers
1513    print "Block number: %d, Block size: %d, Total size: %d" % (
1514        blocknum, blocksize, totalsize)
1515
1516# Test program
1517def test(args=[]):
1518    if not args:
1519        args = [
1520            '/etc/passwd',
1521            'file:/etc/passwd',
1522            'file://localhost/etc/passwd',
1523            'ftp://ftp.gnu.org/pub/README',
1524            'http://www.python.org/index.html',
1525            ]
1526        if hasattr(URLopener, "open_https"):
1527            args.append('https://synergy.as.cmu.edu/~geek/')
1528    try:
1529        for url in args:
1530            print '-'*10, url, '-'*10
1531            fn, h = urlretrieve(url, None, reporthook)
1532            print fn
1533            if h:
1534                print '======'
1535                for k in h.keys(): print k + ':', h[k]
1536                print '======'
1537            fp = open(fn, 'rb')
1538            data = fp.read()
1539            del fp
1540            if '\r' in data:
1541                table = string.maketrans("", "")
1542                data = data.translate(table, "\r")
1543            print data
1544            fn, h = None, None
1545        print '-'*40
1546    finally:
1547        urlcleanup()
1548
1549def main():
1550    import getopt, sys
1551    try:
1552        opts, args = getopt.getopt(sys.argv[1:], "th")
1553    except getopt.error, msg:
1554        print msg
1555        print "Use -h for help"
1556        return
1557    t = 0
1558    for o, a in opts:
1559        if o == '-t':
1560            t = t + 1
1561        if o == '-h':
1562            print "Usage: python urllib.py [-t] [url ...]"
1563            print "-t runs self-test;",
1564            print "otherwise, contents of urls are printed"
1565            return
1566    if t:
1567        if t > 1:
1568            test1()
1569        test(args)
1570    else:
1571        if not args:
1572            print "Use -h for help"
1573        for url in args:
1574            print urlopen(url).read(),
1575
1576# Run test program when run as a script
1577if __name__ == '__main__':
1578    main()
1579