urllib.py revision 3e86595280a9714354372f91108be4d184da8a5f
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34           "urlencode", "url2pathname", "pathname2url", "splittag",
35           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37           "splitnport", "splitquery", "splitattr", "splitvalue",
38           "splitgophertype", "getproxies"]
39
40__version__ = '1.17'    # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'mac':
46    from macurl2path import url2pathname, pathname2url
47elif os.name == 'nt':
48    from nturl2path import url2pathname, pathname2url
49elif os.name == 'riscos':
50    from rourl2path import url2pathname, pathname2url
51else:
52    def url2pathname(pathname):
53        """OS-specific conversion from a relative URL of the 'file' scheme
54        to a file system path; not recommended for general use."""
55        return unquote(pathname)
56
57    def pathname2url(pathname):
58        """OS-specific conversion from a file system path to a relative URL
59        of the 'file' scheme; not recommended for general use."""
60        return quote(pathname)
61
62# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64#     (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
71def urlopen(url, data=None, proxies=None):
72    """urlopen(url [, data]) -> open file-like object"""
73    global _urlopener
74    if proxies is not None:
75        opener = FancyURLopener(proxies=proxies)
76    elif not _urlopener:
77        opener = FancyURLopener()
78        _urlopener = opener
79    else:
80        opener = _urlopener
81    if data is None:
82        return opener.open(url)
83    else:
84        return opener.open(url, data)
85def urlretrieve(url, filename=None, reporthook=None, data=None):
86    global _urlopener
87    if not _urlopener:
88        _urlopener = FancyURLopener()
89    return _urlopener.retrieve(url, filename, reporthook, data)
90def urlcleanup():
91    if _urlopener:
92        _urlopener.cleanup()
93
94# exception raised when downloaded size does not match content-length
95class ContentTooShortError(IOError):
96    def __init__(self, message, content):
97        IOError.__init__(self, message)
98        self.content = content
99
100ftpcache = {}
101class URLopener:
102    """Class to open URLs.
103    This is a class rather than just a subroutine because we may need
104    more than one set of global protocol-specific options.
105    Note -- this is a base class for those who don't want the
106    automatic handling of errors type 302 (relocated) and 401
107    (authorization needed)."""
108
109    __tempfiles = None
110
111    version = "Python-urllib/%s" % __version__
112
113    # Constructor
114    def __init__(self, proxies=None, **x509):
115        if proxies is None:
116            proxies = getproxies()
117        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
118        self.proxies = proxies
119        self.key_file = x509.get('key_file')
120        self.cert_file = x509.get('cert_file')
121        self.addheaders = [('User-agent', self.version)]
122        self.__tempfiles = []
123        self.__unlink = os.unlink # See cleanup()
124        self.tempcache = None
125        # Undocumented feature: if you assign {} to tempcache,
126        # it is used to cache files retrieved with
127        # self.retrieve().  This is not enabled by default
128        # since it does not work for changing documents (and I
129        # haven't got the logic to check expiration headers
130        # yet).
131        self.ftpcache = ftpcache
132        # Undocumented feature: you can use a different
133        # ftp cache by assigning to the .ftpcache member;
134        # in case you want logically independent URL openers
135        # XXX This is not threadsafe.  Bah.
136
137    def __del__(self):
138        self.close()
139
140    def close(self):
141        self.cleanup()
142
143    def cleanup(self):
144        # This code sometimes runs when the rest of this module
145        # has already been deleted, so it can't use any globals
146        # or import anything.
147        if self.__tempfiles:
148            for file in self.__tempfiles:
149                try:
150                    self.__unlink(file)
151                except OSError:
152                    pass
153            del self.__tempfiles[:]
154        if self.tempcache:
155            self.tempcache.clear()
156
157    def addheader(self, *args):
158        """Add a header to be used by the HTTP interface only
159        e.g. u.addheader('Accept', 'sound/basic')"""
160        self.addheaders.append(args)
161
162    # External interface
163    def open(self, fullurl, data=None):
164        """Use URLopener().open(file) instead of open(file, 'r')."""
165        fullurl = unwrap(toBytes(fullurl))
166        if self.tempcache and fullurl in self.tempcache:
167            filename, headers = self.tempcache[fullurl]
168            fp = open(filename, 'rb')
169            return addinfourl(fp, headers, fullurl)
170        urltype, url = splittype(fullurl)
171        if not urltype:
172            urltype = 'file'
173        if urltype in self.proxies:
174            proxy = self.proxies[urltype]
175            urltype, proxyhost = splittype(proxy)
176            host, selector = splithost(proxyhost)
177            url = (host, fullurl) # Signal special case to open_*()
178        else:
179            proxy = None
180        name = 'open_' + urltype
181        self.type = urltype
182        name = name.replace('-', '_')
183        if not hasattr(self, name):
184            if proxy:
185                return self.open_unknown_proxy(proxy, fullurl, data)
186            else:
187                return self.open_unknown(fullurl, data)
188        try:
189            if data is None:
190                return getattr(self, name)(url)
191            else:
192                return getattr(self, name)(url, data)
193        except socket.error, msg:
194            raise IOError, ('socket error', msg), sys.exc_info()[2]
195
196    def open_unknown(self, fullurl, data=None):
197        """Overridable interface to open unknown URL type."""
198        type, url = splittype(fullurl)
199        raise IOError, ('url error', 'unknown url type', type)
200
201    def open_unknown_proxy(self, proxy, fullurl, data=None):
202        """Overridable interface to open unknown URL type."""
203        type, url = splittype(fullurl)
204        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
205
206    # External interface
207    def retrieve(self, url, filename=None, reporthook=None, data=None):
208        """retrieve(url) returns (filename, headers) for a local object
209        or (tempfilename, headers) for a remote object."""
210        url = unwrap(toBytes(url))
211        if self.tempcache and url in self.tempcache:
212            return self.tempcache[url]
213        type, url1 = splittype(url)
214        if filename is None and (not type or type == 'file'):
215            try:
216                fp = self.open_local_file(url1)
217                hdrs = fp.info()
218                del fp
219                return url2pathname(splithost(url1)[1]), hdrs
220            except IOError, msg:
221                pass
222        fp = self.open(url, data)
223        headers = fp.info()
224        if filename:
225            tfp = open(filename, 'wb')
226        else:
227            import tempfile
228            garbage, path = splittype(url)
229            garbage, path = splithost(path or "")
230            path, garbage = splitquery(path or "")
231            path, garbage = splitattr(path or "")
232            suffix = os.path.splitext(path)[1]
233            (fd, filename) = tempfile.mkstemp(suffix)
234            self.__tempfiles.append(filename)
235            tfp = os.fdopen(fd, 'wb')
236        result = filename, headers
237        if self.tempcache is not None:
238            self.tempcache[url] = result
239        bs = 1024*8
240        size = -1
241        read = 0
242        blocknum = 0
243        if reporthook:
244            if "content-length" in headers:
245                size = int(headers["Content-Length"])
246            reporthook(blocknum, bs, size)
247        while 1:
248            block = fp.read(bs)
249            if block == "":
250                break
251            read += len(block)
252            tfp.write(block)
253            blocknum += 1
254            if reporthook:
255                reporthook(blocknum, bs, size)
256        fp.close()
257        tfp.close()
258        del fp
259        del tfp
260
261        # raise exception if actual size does not match content-length header
262        if size >= 0 and read < size:
263            raise ContentTooShortError("retrieval incomplete: got only %i out "
264                                       "of %i bytes" % (read, size), result)
265
266        return result
267
268    # Each method named open_<type> knows how to open that type of URL
269
270    def open_http(self, url, data=None):
271        """Use HTTP protocol."""
272        import httplib
273        user_passwd = None
274        proxy_passwd= None
275        if isinstance(url, str):
276            host, selector = splithost(url)
277            if host:
278                user_passwd, host = splituser(host)
279                host = unquote(host)
280            realhost = host
281        else:
282            host, selector = url
283            # check whether the proxy contains authorization information
284            proxy_passwd, host = splituser(host)
285            # now we proceed with the url we want to obtain
286            urltype, rest = splittype(selector)
287            url = rest
288            user_passwd = None
289            if urltype.lower() != 'http':
290                realhost = None
291            else:
292                realhost, rest = splithost(rest)
293                if realhost:
294                    user_passwd, realhost = splituser(realhost)
295                if user_passwd:
296                    selector = "%s://%s%s" % (urltype, realhost, rest)
297                if proxy_bypass(realhost):
298                    host = realhost
299
300            #print "proxy via http:", host, selector
301        if not host: raise IOError, ('http error', 'no host given')
302
303        if proxy_passwd:
304            import base64
305            proxy_auth = base64.encodestring(proxy_passwd).strip()
306        else:
307            proxy_auth = None
308
309        if user_passwd:
310            import base64
311            auth = base64.encodestring(user_passwd).strip()
312        else:
313            auth = None
314        h = httplib.HTTP(host)
315        if data is not None:
316            h.putrequest('POST', selector)
317            h.putheader('Content-type', 'application/x-www-form-urlencoded')
318            h.putheader('Content-length', '%d' % len(data))
319        else:
320            h.putrequest('GET', selector)
321        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
322        if auth: h.putheader('Authorization', 'Basic %s' % auth)
323        if realhost: h.putheader('Host', realhost)
324        for args in self.addheaders: h.putheader(*args)
325        h.endheaders()
326        if data is not None:
327            h.send(data)
328        errcode, errmsg, headers = h.getreply()
329        fp = h.getfile()
330        if errcode == 200:
331            return addinfourl(fp, headers, "http:" + url)
332        else:
333            if data is None:
334                return self.http_error(url, fp, errcode, errmsg, headers)
335            else:
336                return self.http_error(url, fp, errcode, errmsg, headers, data)
337
338    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
339        """Handle http errors.
340        Derived class can override this, or provide specific handlers
341        named http_error_DDD where DDD is the 3-digit error code."""
342        # First check if there's a specific handler for this error
343        name = 'http_error_%d' % errcode
344        if hasattr(self, name):
345            method = getattr(self, name)
346            if data is None:
347                result = method(url, fp, errcode, errmsg, headers)
348            else:
349                result = method(url, fp, errcode, errmsg, headers, data)
350            if result: return result
351        return self.http_error_default(url, fp, errcode, errmsg, headers)
352
353    def http_error_default(self, url, fp, errcode, errmsg, headers):
354        """Default error handler: close the connection and raise IOError."""
355        void = fp.read()
356        fp.close()
357        raise IOError, ('http error', errcode, errmsg, headers)
358
359    if hasattr(socket, "ssl"):
360        def open_https(self, url, data=None):
361            """Use HTTPS protocol."""
362            import httplib
363            user_passwd = None
364            proxy_passwd = None
365            if isinstance(url, str):
366                host, selector = splithost(url)
367                if host:
368                    user_passwd, host = splituser(host)
369                    host = unquote(host)
370                realhost = host
371            else:
372                host, selector = url
373                # here, we determine, whether the proxy contains authorization information
374                proxy_passwd, host = splituser(host)
375                urltype, rest = splittype(selector)
376                url = rest
377                user_passwd = None
378                if urltype.lower() != 'https':
379                    realhost = None
380                else:
381                    realhost, rest = splithost(rest)
382                    if realhost:
383                        user_passwd, realhost = splituser(realhost)
384                    if user_passwd:
385                        selector = "%s://%s%s" % (urltype, realhost, rest)
386                #print "proxy via https:", host, selector
387            if not host: raise IOError, ('https error', 'no host given')
388            if proxy_passwd:
389                import base64
390                proxy_auth = base64.encodestring(proxy_passwd).strip()
391            else:
392                proxy_auth = None
393            if user_passwd:
394                import base64
395                auth = base64.encodestring(user_passwd).strip()
396            else:
397                auth = None
398            h = httplib.HTTPS(host, 0,
399                              key_file=self.key_file,
400                              cert_file=self.cert_file)
401            if data is not None:
402                h.putrequest('POST', selector)
403                h.putheader('Content-type',
404                            'application/x-www-form-urlencoded')
405                h.putheader('Content-length', '%d' % len(data))
406            else:
407                h.putrequest('GET', selector)
408            if proxy_auth: h.putheader('Proxy-Authorization: Basic %s' % proxy_auth)
409            if auth: h.putheader('Authorization: Basic %s' % auth)
410            if realhost: h.putheader('Host', realhost)
411            for args in self.addheaders: h.putheader(*args)
412            h.endheaders()
413            if data is not None:
414                h.send(data)
415            errcode, errmsg, headers = h.getreply()
416            fp = h.getfile()
417            if errcode == 200:
418                return addinfourl(fp, headers, "https:" + url)
419            else:
420                if data is None:
421                    return self.http_error(url, fp, errcode, errmsg, headers)
422                else:
423                    return self.http_error(url, fp, errcode, errmsg, headers,
424                                           data)
425
426    def open_gopher(self, url):
427        """Use Gopher protocol."""
428        if not isinstance(url, str):
429            raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
430        import gopherlib
431        host, selector = splithost(url)
432        if not host: raise IOError, ('gopher error', 'no host given')
433        host = unquote(host)
434        type, selector = splitgophertype(selector)
435        selector, query = splitquery(selector)
436        selector = unquote(selector)
437        if query:
438            query = unquote(query)
439            fp = gopherlib.send_query(selector, query, host)
440        else:
441            fp = gopherlib.send_selector(selector, host)
442        return addinfourl(fp, noheaders(), "gopher:" + url)
443
444    def open_file(self, url):
445        if not isinstance(url, str):
446            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
447        """Use local file or FTP depending on form of URL."""
448        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
449            return self.open_ftp(url)
450        else:
451            return self.open_local_file(url)
452
453    def open_local_file(self, url):
454        """Use local file."""
455        import mimetypes, mimetools, email.Utils
456        try:
457            from cStringIO import StringIO
458        except ImportError:
459            from StringIO import StringIO
460        host, file = splithost(url)
461        localname = url2pathname(file)
462        try:
463            stats = os.stat(localname)
464        except OSError, e:
465            raise IOError(e.errno, e.strerror, e.filename)
466        size = stats.st_size
467        modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
468        mtype = mimetypes.guess_type(url)[0]
469        headers = mimetools.Message(StringIO(
470            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
471            (mtype or 'text/plain', size, modified)))
472        if not host:
473            urlfile = file
474            if file[:1] == '/':
475                urlfile = 'file://' + file
476            return addinfourl(open(localname, 'rb'),
477                              headers, urlfile)
478        host, port = splitport(host)
479        if not port \
480           and socket.gethostbyname(host) in (localhost(), thishost()):
481            urlfile = file
482            if file[:1] == '/':
483                urlfile = 'file://' + file
484            return addinfourl(open(localname, 'rb'),
485                              headers, urlfile)
486        raise IOError, ('local file error', 'not on local host')
487
488    def open_ftp(self, url):
489        """Use FTP protocol."""
490        if not isinstance(url, str):
491            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
492        import mimetypes, mimetools
493        try:
494            from cStringIO import StringIO
495        except ImportError:
496            from StringIO import StringIO
497        host, path = splithost(url)
498        if not host: raise IOError, ('ftp error', 'no host given')
499        host, port = splitport(host)
500        user, host = splituser(host)
501        if user: user, passwd = splitpasswd(user)
502        else: passwd = None
503        host = unquote(host)
504        user = unquote(user or '')
505        passwd = unquote(passwd or '')
506        host = socket.gethostbyname(host)
507        if not port:
508            import ftplib
509            port = ftplib.FTP_PORT
510        else:
511            port = int(port)
512        path, attrs = splitattr(path)
513        path = unquote(path)
514        dirs = path.split('/')
515        dirs, file = dirs[:-1], dirs[-1]
516        if dirs and not dirs[0]: dirs = dirs[1:]
517        if dirs and not dirs[0]: dirs[0] = '/'
518        key = user, host, port, '/'.join(dirs)
519        # XXX thread unsafe!
520        if len(self.ftpcache) > MAXFTPCACHE:
521            # Prune the cache, rather arbitrarily
522            for k in self.ftpcache.keys():
523                if k != key:
524                    v = self.ftpcache[k]
525                    del self.ftpcache[k]
526                    v.close()
527        try:
528            if not key in self.ftpcache:
529                self.ftpcache[key] = \
530                    ftpwrapper(user, passwd, host, port, dirs)
531            if not file: type = 'D'
532            else: type = 'I'
533            for attr in attrs:
534                attr, value = splitvalue(attr)
535                if attr.lower() == 'type' and \
536                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
537                    type = value.upper()
538            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
539            mtype = mimetypes.guess_type("ftp:" + url)[0]
540            headers = ""
541            if mtype:
542                headers += "Content-Type: %s\n" % mtype
543            if retrlen is not None and retrlen >= 0:
544                headers += "Content-Length: %d\n" % retrlen
545            headers = mimetools.Message(StringIO(headers))
546            return addinfourl(fp, headers, "ftp:" + url)
547        except ftperrors(), msg:
548            raise IOError, ('ftp error', msg), sys.exc_info()[2]
549
550    def open_data(self, url, data=None):
551        """Use "data" URL."""
552        if not isinstance(url, str):
553            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
554        # ignore POSTed data
555        #
556        # syntax of data URLs:
557        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
558        # mediatype := [ type "/" subtype ] *( ";" parameter )
559        # data      := *urlchar
560        # parameter := attribute "=" value
561        import mimetools
562        try:
563            from cStringIO import StringIO
564        except ImportError:
565            from StringIO import StringIO
566        try:
567            [type, data] = url.split(',', 1)
568        except ValueError:
569            raise IOError, ('data error', 'bad data URL')
570        if not type:
571            type = 'text/plain;charset=US-ASCII'
572        semi = type.rfind(';')
573        if semi >= 0 and '=' not in type[semi:]:
574            encoding = type[semi+1:]
575            type = type[:semi]
576        else:
577            encoding = ''
578        msg = []
579        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
580                                            time.gmtime(time.time())))
581        msg.append('Content-type: %s' % type)
582        if encoding == 'base64':
583            import base64
584            data = base64.decodestring(data)
585        else:
586            data = unquote(data)
587        msg.append('Content-length: %d' % len(data))
588        msg.append('')
589        msg.append(data)
590        msg = '\n'.join(msg)
591        f = StringIO(msg)
592        headers = mimetools.Message(f, 0)
593        #f.fileno = None     # needed for addinfourl
594        return addinfourl(f, headers, url)
595
596
597class FancyURLopener(URLopener):
598    """Derived class with handlers for errors we can handle (perhaps)."""
599
600    def __init__(self, *args, **kwargs):
601        URLopener.__init__(self, *args, **kwargs)
602        self.auth_cache = {}
603        self.tries = 0
604        self.maxtries = 10
605
606    def http_error_default(self, url, fp, errcode, errmsg, headers):
607        """Default error handling -- don't raise an exception."""
608        return addinfourl(fp, headers, "http:" + url)
609
610    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
611        """Error 302 -- relocated (temporarily)."""
612        self.tries += 1
613        if self.maxtries and self.tries >= self.maxtries:
614            if hasattr(self, "http_error_500"):
615                meth = self.http_error_500
616            else:
617                meth = self.http_error_default
618            self.tries = 0
619            return meth(url, fp, 500,
620                        "Internal Server Error: Redirect Recursion", headers)
621        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
622                                        data)
623        self.tries = 0
624        return result
625
626    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
627        if 'location' in headers:
628            newurl = headers['location']
629        elif 'uri' in headers:
630            newurl = headers['uri']
631        else:
632            return
633        void = fp.read()
634        fp.close()
635        # In case the server sent a relative URL, join with original:
636        newurl = basejoin(self.type + ":" + url, newurl)
637        return self.open(newurl)
638
639    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
640        """Error 301 -- also relocated (permanently)."""
641        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
642
643    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
644        """Error 303 -- also relocated (essentially identical to 302)."""
645        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
646
647    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
648        """Error 307 -- relocated, but turn POST into error."""
649        if data is None:
650            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
651        else:
652            return self.http_error_default(url, fp, errcode, errmsg, headers)
653
654    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
655        """Error 401 -- authentication required.
656        This function supports Basic authentication only."""
657        if not 'www-authenticate' in headers:
658            URLopener.http_error_default(self, url, fp,
659                                         errcode, errmsg, headers)
660        stuff = headers['www-authenticate']
661        import re
662        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
663        if not match:
664            URLopener.http_error_default(self, url, fp,
665                                         errcode, errmsg, headers)
666        scheme, realm = match.groups()
667        if scheme.lower() != 'basic':
668            URLopener.http_error_default(self, url, fp,
669                                         errcode, errmsg, headers)
670        name = 'retry_' + self.type + '_basic_auth'
671        if data is None:
672            return getattr(self,name)(url, realm)
673        else:
674            return getattr(self,name)(url, realm, data)
675
676    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
677        """Error 407 -- proxy authentication required.
678        This function supports Basic authentication only."""
679        if not 'proxy-authenticate' in headers:
680            URLopener.http_error_default(self, url, fp,
681                                         errcode, errmsg, headers)
682        stuff = headers['proxy-authenticate']
683        import re
684        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
685        if not match:
686            URLopener.http_error_default(self, url, fp,
687                                         errcode, errmsg, headers)
688        scheme, realm = match.groups()
689        if scheme.lower() != 'basic':
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        name = 'retry_proxy_' + self.type + '_basic_auth'
693        if data is None:
694            return getattr(self,name)(url, realm)
695        else:
696            return getattr(self,name)(url, realm, data)
697
698    def retry_proxy_http_basic_auth(self, url, realm, data=None):
699        host, selector = splithost(url)
700        newurl = 'http://' + host + selector
701        proxy = self.proxies['http']
702        urltype, proxyhost = splittype(proxy)
703        proxyhost, proxyselector = splithost(proxyhost)
704        i = proxyhost.find('@') + 1
705        proxyhost = proxyhost[i:]
706        user, passwd = self.get_user_passwd(proxyhost, realm, i)
707        if not (user or passwd): return None
708        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
709        self.proxies['http'] = 'http://' + proxyhost + proxyselector
710        if data is None:
711            return self.open(newurl)
712        else:
713            return self.open(newurl, data)
714
715    def retry_proxy_https_basic_auth(self, url, realm, data=None):
716        host, selector = splithost(url)
717        newurl = 'https://' + host + selector
718        proxy = self.proxies['https']
719        urltype, proxyhost = splittype(proxy)
720        proxyhost, proxyselector = splithost(proxyhost)
721        i = proxyhost.find('@') + 1
722        proxyhost = proxyhost[i:]
723        user, passwd = self.get_user_passwd(proxyhost, realm, i)
724        if not (user or passwd): return None
725        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
726        self.proxies['https'] = 'https://' + proxyhost + proxyselector
727        if data is None:
728            return self.open(newurl)
729        else:
730            return self.open(newurl, data)
731
732    def retry_http_basic_auth(self, url, realm, data=None):
733        host, selector = splithost(url)
734        i = host.find('@') + 1
735        host = host[i:]
736        user, passwd = self.get_user_passwd(host, realm, i)
737        if not (user or passwd): return None
738        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
739        newurl = 'http://' + host + selector
740        if data is None:
741            return self.open(newurl)
742        else:
743            return self.open(newurl, data)
744
745    def retry_https_basic_auth(self, url, realm, data=None):
746        host, selector = splithost(url)
747        i = host.find('@') + 1
748        host = host[i:]
749        user, passwd = self.get_user_passwd(host, realm, i)
750        if not (user or passwd): return None
751        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
752        newurl = 'https://' + host + selector
753        if data is None:
754            return self.open(newurl)
755        else:
756            return self.open(newurl, data)
757
758    def get_user_passwd(self, host, realm, clear_cache = 0):
759        key = realm + '@' + host.lower()
760        if key in self.auth_cache:
761            if clear_cache:
762                del self.auth_cache[key]
763            else:
764                return self.auth_cache[key]
765        user, passwd = self.prompt_user_passwd(host, realm)
766        if user or passwd: self.auth_cache[key] = (user, passwd)
767        return user, passwd
768
769    def prompt_user_passwd(self, host, realm):
770        """Override this in a GUI environment!"""
771        import getpass
772        try:
773            user = raw_input("Enter username for %s at %s: " % (realm,
774                                                                host))
775            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
776                (user, realm, host))
777            return user, passwd
778        except KeyboardInterrupt:
779            print
780            return None, None
781
782
783# Utility functions
784
785_localhost = None
786def localhost():
787    """Return the IP address of the magic hostname 'localhost'."""
788    global _localhost
789    if _localhost is None:
790        _localhost = socket.gethostbyname('localhost')
791    return _localhost
792
793_thishost = None
794def thishost():
795    """Return the IP address of the current host."""
796    global _thishost
797    if _thishost is None:
798        _thishost = socket.gethostbyname(socket.gethostname())
799    return _thishost
800
801_ftperrors = None
802def ftperrors():
803    """Return the set of errors raised by the FTP class."""
804    global _ftperrors
805    if _ftperrors is None:
806        import ftplib
807        _ftperrors = ftplib.all_errors
808    return _ftperrors
809
810_noheaders = None
811def noheaders():
812    """Return an empty mimetools.Message object."""
813    global _noheaders
814    if _noheaders is None:
815        import mimetools
816        try:
817            from cStringIO import StringIO
818        except ImportError:
819            from StringIO import StringIO
820        _noheaders = mimetools.Message(StringIO(), 0)
821        _noheaders.fp.close()   # Recycle file descriptor
822    return _noheaders
823
824
825# Utility classes
826
827class ftpwrapper:
828    """Class used by open_ftp() for cache of open FTP connections."""
829
830    def __init__(self, user, passwd, host, port, dirs):
831        self.user = user
832        self.passwd = passwd
833        self.host = host
834        self.port = port
835        self.dirs = dirs
836        self.init()
837
838    def init(self):
839        import ftplib
840        self.busy = 0
841        self.ftp = ftplib.FTP()
842        self.ftp.connect(self.host, self.port)
843        self.ftp.login(self.user, self.passwd)
844        for dir in self.dirs:
845            self.ftp.cwd(dir)
846
847    def retrfile(self, file, type):
848        import ftplib
849        self.endtransfer()
850        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
851        else: cmd = 'TYPE ' + type; isdir = 0
852        try:
853            self.ftp.voidcmd(cmd)
854        except ftplib.all_errors:
855            self.init()
856            self.ftp.voidcmd(cmd)
857        conn = None
858        if file and not isdir:
859            # Use nlst to see if the file exists at all
860            try:
861                self.ftp.nlst(file)
862            except ftplib.error_perm, reason:
863                raise IOError, ('ftp error', reason), sys.exc_info()[2]
864            # Restore the transfer mode!
865            self.ftp.voidcmd(cmd)
866            # Try to retrieve as a file
867            try:
868                cmd = 'RETR ' + file
869                conn = self.ftp.ntransfercmd(cmd)
870            except ftplib.error_perm, reason:
871                if str(reason)[:3] != '550':
872                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
873        if not conn:
874            # Set transfer mode to ASCII!
875            self.ftp.voidcmd('TYPE A')
876            # Try a directory listing
877            if file: cmd = 'LIST ' + file
878            else: cmd = 'LIST'
879            conn = self.ftp.ntransfercmd(cmd)
880        self.busy = 1
881        # Pass back both a suitably decorated object and a retrieval length
882        return (addclosehook(conn[0].makefile('rb'),
883                             self.endtransfer), conn[1])
884    def endtransfer(self):
885        if not self.busy:
886            return
887        self.busy = 0
888        try:
889            self.ftp.voidresp()
890        except ftperrors():
891            pass
892
893    def close(self):
894        self.endtransfer()
895        try:
896            self.ftp.close()
897        except ftperrors():
898            pass
899
900class addbase:
901    """Base class for addinfo and addclosehook."""
902
903    def __init__(self, fp):
904        self.fp = fp
905        self.read = self.fp.read
906        self.readline = self.fp.readline
907        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
908        if hasattr(self.fp, "fileno"):
909            self.fileno = self.fp.fileno
910        else:
911            self.fileno = lambda: None
912        if hasattr(self.fp, "__iter__"):
913            self.__iter__ = self.fp.__iter__
914            if hasattr(self.fp, "next"):
915                self.next = self.fp.next
916
917    def __repr__(self):
918        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
919                                             id(self), self.fp)
920
921    def close(self):
922        self.read = None
923        self.readline = None
924        self.readlines = None
925        self.fileno = None
926        if self.fp: self.fp.close()
927        self.fp = None
928
929class addclosehook(addbase):
930    """Class to add a close hook to an open file."""
931
932    def __init__(self, fp, closehook, *hookargs):
933        addbase.__init__(self, fp)
934        self.closehook = closehook
935        self.hookargs = hookargs
936
937    def close(self):
938        addbase.close(self)
939        if self.closehook:
940            self.closehook(*self.hookargs)
941            self.closehook = None
942            self.hookargs = None
943
944class addinfo(addbase):
945    """class to add an info() method to an open file."""
946
947    def __init__(self, fp, headers):
948        addbase.__init__(self, fp)
949        self.headers = headers
950
951    def info(self):
952        return self.headers
953
954class addinfourl(addbase):
955    """class to add info() and geturl() methods to an open file."""
956
957    def __init__(self, fp, headers, url):
958        addbase.__init__(self, fp)
959        self.headers = headers
960        self.url = url
961
962    def info(self):
963        return self.headers
964
965    def geturl(self):
966        return self.url
967
968
969# Utilities to parse URLs (most of these return None for missing parts):
970# unwrap('<URL:type://host/path>') --> 'type://host/path'
971# splittype('type:opaquestring') --> 'type', 'opaquestring'
972# splithost('//host[:port]/path') --> 'host[:port]', '/path'
973# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
974# splitpasswd('user:passwd') -> 'user', 'passwd'
975# splitport('host:port') --> 'host', 'port'
976# splitquery('/path?query') --> '/path', 'query'
977# splittag('/path#tag') --> '/path', 'tag'
978# splitattr('/path;attr1=value1;attr2=value2;...') ->
979#   '/path', ['attr1=value1', 'attr2=value2', ...]
980# splitvalue('attr=value') --> 'attr', 'value'
981# splitgophertype('/Xselector') --> 'X', 'selector'
982# unquote('abc%20def') -> 'abc def'
983# quote('abc def') -> 'abc%20def')
984
985try:
986    unicode
987except NameError:
988    def _is_unicode(x):
989        return 0
990else:
991    def _is_unicode(x):
992        return isinstance(x, unicode)
993
994def toBytes(url):
995    """toBytes(u"URL") --> 'URL'."""
996    # Most URL schemes require ASCII. If that changes, the conversion
997    # can be relaxed
998    if _is_unicode(url):
999        try:
1000            url = url.encode("ASCII")
1001        except UnicodeError:
1002            raise UnicodeError("URL " + repr(url) +
1003                               " contains non-ASCII characters")
1004    return url
1005
1006def unwrap(url):
1007    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1008    url = url.strip()
1009    if url[:1] == '<' and url[-1:] == '>':
1010        url = url[1:-1].strip()
1011    if url[:4] == 'URL:': url = url[4:].strip()
1012    return url
1013
1014_typeprog = None
1015def splittype(url):
1016    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1017    global _typeprog
1018    if _typeprog is None:
1019        import re
1020        _typeprog = re.compile('^([^/:]+):')
1021
1022    match = _typeprog.match(url)
1023    if match:
1024        scheme = match.group(1)
1025        return scheme.lower(), url[len(scheme) + 1:]
1026    return None, url
1027
1028_hostprog = None
1029def splithost(url):
1030    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1031    global _hostprog
1032    if _hostprog is None:
1033        import re
1034        _hostprog = re.compile('^//([^/]*)(.*)$')
1035
1036    match = _hostprog.match(url)
1037    if match: return match.group(1, 2)
1038    return None, url
1039
1040_userprog = None
1041def splituser(host):
1042    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1043    global _userprog
1044    if _userprog is None:
1045        import re
1046        _userprog = re.compile('^(.*)@(.*)$')
1047
1048    match = _userprog.match(host)
1049    if match: return map(unquote, match.group(1, 2))
1050    return None, host
1051
1052_passwdprog = None
1053def splitpasswd(user):
1054    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1055    global _passwdprog
1056    if _passwdprog is None:
1057        import re
1058        _passwdprog = re.compile('^([^:]*):(.*)$')
1059
1060    match = _passwdprog.match(user)
1061    if match: return match.group(1, 2)
1062    return user, None
1063
1064# splittag('/path#tag') --> '/path', 'tag'
1065_portprog = None
1066def splitport(host):
1067    """splitport('host:port') --> 'host', 'port'."""
1068    global _portprog
1069    if _portprog is None:
1070        import re
1071        _portprog = re.compile('^(.*):([0-9]+)$')
1072
1073    match = _portprog.match(host)
1074    if match: return match.group(1, 2)
1075    return host, None
1076
1077_nportprog = None
1078def splitnport(host, defport=-1):
1079    """Split host and port, returning numeric port.
1080    Return given default port if no ':' found; defaults to -1.
1081    Return numerical port if a valid number are found after ':'.
1082    Return None if ':' but not a valid number."""
1083    global _nportprog
1084    if _nportprog is None:
1085        import re
1086        _nportprog = re.compile('^(.*):(.*)$')
1087
1088    match = _nportprog.match(host)
1089    if match:
1090        host, port = match.group(1, 2)
1091        try:
1092            if not port: raise ValueError, "no digits"
1093            nport = int(port)
1094        except ValueError:
1095            nport = None
1096        return host, nport
1097    return host, defport
1098
1099_queryprog = None
1100def splitquery(url):
1101    """splitquery('/path?query') --> '/path', 'query'."""
1102    global _queryprog
1103    if _queryprog is None:
1104        import re
1105        _queryprog = re.compile('^(.*)\?([^?]*)$')
1106
1107    match = _queryprog.match(url)
1108    if match: return match.group(1, 2)
1109    return url, None
1110
1111_tagprog = None
1112def splittag(url):
1113    """splittag('/path#tag') --> '/path', 'tag'."""
1114    global _tagprog
1115    if _tagprog is None:
1116        import re
1117        _tagprog = re.compile('^(.*)#([^#]*)$')
1118
1119    match = _tagprog.match(url)
1120    if match: return match.group(1, 2)
1121    return url, None
1122
1123def splitattr(url):
1124    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1125        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1126    words = url.split(';')
1127    return words[0], words[1:]
1128
1129_valueprog = None
1130def splitvalue(attr):
1131    """splitvalue('attr=value') --> 'attr', 'value'."""
1132    global _valueprog
1133    if _valueprog is None:
1134        import re
1135        _valueprog = re.compile('^([^=]*)=(.*)$')
1136
1137    match = _valueprog.match(attr)
1138    if match: return match.group(1, 2)
1139    return attr, None
1140
1141def splitgophertype(selector):
1142    """splitgophertype('/Xselector') --> 'X', 'selector'."""
1143    if selector[:1] == '/' and selector[1:2]:
1144        return selector[1], selector[2:]
1145    return None, selector
1146
1147_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1148_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1149
1150def unquote(s):
1151    """unquote('abc%20def') -> 'abc def'."""
1152    res = s.split('%')
1153    for i in xrange(1, len(res)):
1154        item = res[i]
1155        try:
1156            res[i] = _hextochr[item[:2]] + item[2:]
1157        except KeyError:
1158            res[i] = '%' + item
1159        except UnicodeDecodeError:
1160            res[i] = unichr(int(item[:2], 16)) + item[2:]
1161    return "".join(res)
1162
1163def unquote_plus(s):
1164    """unquote('%7e/abc+def') -> '~/abc def'"""
1165    s = s.replace('+', ' ')
1166    return unquote(s)
1167
1168always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1169               'abcdefghijklmnopqrstuvwxyz'
1170               '0123456789' '_.-')
1171_safemaps = {}
1172
1173def quote(s, safe = '/'):
1174    """quote('abc def') -> 'abc%20def'
1175
1176    Each part of a URL, e.g. the path info, the query, etc., has a
1177    different set of reserved characters that must be quoted.
1178
1179    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1180    the following reserved characters.
1181
1182    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1183                  "$" | ","
1184
1185    Each of these characters is reserved in some component of a URL,
1186    but not necessarily in all of them.
1187
1188    By default, the quote function is intended for quoting the path
1189    section of a URL.  Thus, it will not encode '/'.  This character
1190    is reserved, but in typical usage the quote function is being
1191    called on a path where the existing slash characters are used as
1192    reserved characters.
1193    """
1194    cachekey = (safe, always_safe)
1195    try:
1196        safe_map = _safemaps[cachekey]
1197    except KeyError:
1198        safe += always_safe
1199        safe_map = {}
1200        for i in range(256):
1201            c = chr(i)
1202            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1203        _safemaps[cachekey] = safe_map
1204    res = map(safe_map.__getitem__, s)
1205    return ''.join(res)
1206
1207def quote_plus(s, safe = ''):
1208    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1209    if ' ' in s:
1210        s = quote(s, safe + ' ')
1211        return s.replace(' ', '+')
1212    return quote(s, safe)
1213
1214def urlencode(query,doseq=0):
1215    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1216
1217    If any values in the query arg are sequences and doseq is true, each
1218    sequence element is converted to a separate parameter.
1219
1220    If the query arg is a sequence of two-element tuples, the order of the
1221    parameters in the output will match the order of parameters in the
1222    input.
1223    """
1224
1225    if hasattr(query,"items"):
1226        # mapping objects
1227        query = query.items()
1228    else:
1229        # it's a bother at times that strings and string-like objects are
1230        # sequences...
1231        try:
1232            # non-sequence items should not work with len()
1233            # non-empty strings will fail this
1234            if len(query) and not isinstance(query[0], tuple):
1235                raise TypeError
1236            # zero-length sequences of all types will get here and succeed,
1237            # but that's a minor nit - since the original implementation
1238            # allowed empty dicts that type of behavior probably should be
1239            # preserved for consistency
1240        except TypeError:
1241            ty,va,tb = sys.exc_info()
1242            raise TypeError, "not a valid non-string sequence or mapping object", tb
1243
1244    l = []
1245    if not doseq:
1246        # preserve old behavior
1247        for k, v in query:
1248            k = quote_plus(str(k))
1249            v = quote_plus(str(v))
1250            l.append(k + '=' + v)
1251    else:
1252        for k, v in query:
1253            k = quote_plus(str(k))
1254            if isinstance(v, str):
1255                v = quote_plus(v)
1256                l.append(k + '=' + v)
1257            elif _is_unicode(v):
1258                # is there a reasonable way to convert to ASCII?
1259                # encode generates a string, but "replace" or "ignore"
1260                # lose information and "strict" can raise UnicodeError
1261                v = quote_plus(v.encode("ASCII","replace"))
1262                l.append(k + '=' + v)
1263            else:
1264                try:
1265                    # is this a sufficient test for sequence-ness?
1266                    x = len(v)
1267                except TypeError:
1268                    # not a sequence
1269                    v = quote_plus(str(v))
1270                    l.append(k + '=' + v)
1271                else:
1272                    # loop over the sequence
1273                    for elt in v:
1274                        l.append(k + '=' + quote_plus(str(elt)))
1275    return '&'.join(l)
1276
1277# Proxy handling
1278def getproxies_environment():
1279    """Return a dictionary of scheme -> proxy server URL mappings.
1280
1281    Scan the environment for variables named <scheme>_proxy;
1282    this seems to be the standard convention.  If you need a
1283    different way, you can pass a proxies dictionary to the
1284    [Fancy]URLopener constructor.
1285
1286    """
1287    proxies = {}
1288    for name, value in os.environ.items():
1289        name = name.lower()
1290        if value and name[-6:] == '_proxy':
1291            proxies[name[:-6]] = value
1292    return proxies
1293
1294if sys.platform == 'darwin':
1295    def getproxies_internetconfig():
1296        """Return a dictionary of scheme -> proxy server URL mappings.
1297
1298        By convention the mac uses Internet Config to store
1299        proxies.  An HTTP proxy, for instance, is stored under
1300        the HttpProxy key.
1301
1302        """
1303        try:
1304            import ic
1305        except ImportError:
1306            return {}
1307
1308        try:
1309            config = ic.IC()
1310        except ic.error:
1311            return {}
1312        proxies = {}
1313        # HTTP:
1314        if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1315            try:
1316                value = config['HTTPProxyHost']
1317            except ic.error:
1318                pass
1319            else:
1320                proxies['http'] = 'http://%s' % value
1321        # FTP: XXXX To be done.
1322        # Gopher: XXXX To be done.
1323        return proxies
1324
1325    def proxy_bypass(x):
1326        return 0
1327
1328    def getproxies():
1329        return getproxies_environment() or getproxies_internetconfig()
1330
1331elif os.name == 'nt':
1332    def getproxies_registry():
1333        """Return a dictionary of scheme -> proxy server URL mappings.
1334
1335        Win32 uses the registry to store proxies.
1336
1337        """
1338        proxies = {}
1339        try:
1340            import _winreg
1341        except ImportError:
1342            # Std module, so should be around - but you never know!
1343            return proxies
1344        try:
1345            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1346                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1347            proxyEnable = _winreg.QueryValueEx(internetSettings,
1348                                               'ProxyEnable')[0]
1349            if proxyEnable:
1350                # Returned as Unicode but problems if not converted to ASCII
1351                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1352                                                       'ProxyServer')[0])
1353                if '=' in proxyServer:
1354                    # Per-protocol settings
1355                    for p in proxyServer.split(';'):
1356                        protocol, address = p.split('=', 1)
1357                        # See if address has a type:// prefix
1358                        import re
1359                        if not re.match('^([^/:]+)://', address):
1360                            address = '%s://%s' % (protocol, address)
1361                        proxies[protocol] = address
1362                else:
1363                    # Use one setting for all protocols
1364                    if proxyServer[:5] == 'http:':
1365                        proxies['http'] = proxyServer
1366                    else:
1367                        proxies['http'] = 'http://%s' % proxyServer
1368                        proxies['ftp'] = 'ftp://%s' % proxyServer
1369            internetSettings.Close()
1370        except (WindowsError, ValueError, TypeError):
1371            # Either registry key not found etc, or the value in an
1372            # unexpected format.
1373            # proxies already set up to be empty so nothing to do
1374            pass
1375        return proxies
1376
1377    def getproxies():
1378        """Return a dictionary of scheme -> proxy server URL mappings.
1379
1380        Returns settings gathered from the environment, if specified,
1381        or the registry.
1382
1383        """
1384        return getproxies_environment() or getproxies_registry()
1385
1386    def proxy_bypass(host):
1387        try:
1388            import _winreg
1389            import re
1390        except ImportError:
1391            # Std modules, so should be around - but you never know!
1392            return 0
1393        try:
1394            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1395                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1396            proxyEnable = _winreg.QueryValueEx(internetSettings,
1397                                               'ProxyEnable')[0]
1398            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1399                                                     'ProxyOverride')[0])
1400            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1401        except WindowsError:
1402            return 0
1403        if not proxyEnable or not proxyOverride:
1404            return 0
1405        # try to make a host list from name and IP address.
1406        host = [host]
1407        try:
1408            addr = socket.gethostbyname(host[0])
1409            if addr != host:
1410                host.append(addr)
1411        except socket.error:
1412            pass
1413        # make a check value list from the registry entry: replace the
1414        # '<local>' string by the localhost entry and the corresponding
1415        # canonical entry.
1416        proxyOverride = proxyOverride.split(';')
1417        i = 0
1418        while i < len(proxyOverride):
1419            if proxyOverride[i] == '<local>':
1420                proxyOverride[i:i+1] = ['localhost',
1421                                        '127.0.0.1',
1422                                        socket.gethostname(),
1423                                        socket.gethostbyname(
1424                                            socket.gethostname())]
1425            i += 1
1426        # print proxyOverride
1427        # now check if we match one of the registry values.
1428        for test in proxyOverride:
1429            test = test.replace(".", r"\.")     # mask dots
1430            test = test.replace("*", r".*")     # change glob sequence
1431            test = test.replace("?", r".")      # change glob char
1432            for val in host:
1433                # print "%s <--> %s" %( test, val )
1434                if re.match(test, val, re.I):
1435                    return 1
1436        return 0
1437
1438else:
1439    # By default use environment variables
1440    getproxies = getproxies_environment
1441
1442    def proxy_bypass(host):
1443        return 0
1444
1445# Test and time quote() and unquote()
1446def test1():
1447    s = ''
1448    for i in range(256): s = s + chr(i)
1449    s = s*4
1450    t0 = time.time()
1451    qs = quote(s)
1452    uqs = unquote(qs)
1453    t1 = time.time()
1454    if uqs != s:
1455        print 'Wrong!'
1456    print repr(s)
1457    print repr(qs)
1458    print repr(uqs)
1459    print round(t1 - t0, 3), 'sec'
1460
1461
1462def reporthook(blocknum, blocksize, totalsize):
1463    # Report during remote transfers
1464    print "Block number: %d, Block size: %d, Total size: %d" % (
1465        blocknum, blocksize, totalsize)
1466
1467# Test program
1468def test(args=[]):
1469    if not args:
1470        args = [
1471            '/etc/passwd',
1472            'file:/etc/passwd',
1473            'file://localhost/etc/passwd',
1474            'ftp://ftp.python.org/pub/python/README',
1475##          'gopher://gopher.micro.umn.edu/1/',
1476            'http://www.python.org/index.html',
1477            ]
1478        if hasattr(URLopener, "open_https"):
1479            args.append('https://synergy.as.cmu.edu/~geek/')
1480    try:
1481        for url in args:
1482            print '-'*10, url, '-'*10
1483            fn, h = urlretrieve(url, None, reporthook)
1484            print fn
1485            if h:
1486                print '======'
1487                for k in h.keys(): print k + ':', h[k]
1488                print '======'
1489            fp = open(fn, 'rb')
1490            data = fp.read()
1491            del fp
1492            if '\r' in data:
1493                table = string.maketrans("", "")
1494                data = data.translate(table, "\r")
1495            print data
1496            fn, h = None, None
1497        print '-'*40
1498    finally:
1499        urlcleanup()
1500
1501def main():
1502    import getopt, sys
1503    try:
1504        opts, args = getopt.getopt(sys.argv[1:], "th")
1505    except getopt.error, msg:
1506        print msg
1507        print "Use -h for help"
1508        return
1509    t = 0
1510    for o, a in opts:
1511        if o == '-t':
1512            t = t + 1
1513        if o == '-h':
1514            print "Usage: python urllib.py [-t] [url ...]"
1515            print "-t runs self-test;",
1516            print "otherwise, contents of urls are printed"
1517            return
1518    if t:
1519        if t > 1:
1520            test1()
1521        test(args)
1522    else:
1523        if not args:
1524            print "Use -h for help"
1525        for url in args:
1526            print urlopen(url).read(),
1527
1528# Run test program when run as a script
1529if __name__ == '__main__':
1530    main()
1531