urllib.py revision 5a096e1b100603f5537eca5124be17abacf17743
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34           "urlencode", "url2pathname", "pathname2url", "splittag",
35           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37           "splitnport", "splitquery", "splitattr", "splitvalue",
38           "splitgophertype", "getproxies"]
39
40__version__ = '1.17'    # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'mac':
46    from macurl2path import url2pathname, pathname2url
47elif os.name == 'nt':
48    from nturl2path import url2pathname, pathname2url
49elif os.name == 'riscos':
50    from rourl2path import url2pathname, pathname2url
51else:
52    def url2pathname(pathname):
53        """OS-specific conversion from a relative URL of the 'file' scheme
54        to a file system path; not recommended for general use."""
55        return unquote(pathname)
56
57    def pathname2url(pathname):
58        """OS-specific conversion from a file system path to a relative URL
59        of the 'file' scheme; not recommended for general use."""
60        return quote(pathname)
61
62# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64#     (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
71def urlopen(url, data=None, proxies=None):
72    """urlopen(url [, data]) -> open file-like object"""
73    global _urlopener
74    if proxies is not None:
75        opener = FancyURLopener(proxies=proxies)
76    elif not _urlopener:
77        opener = FancyURLopener()
78        _urlopener = opener
79    else:
80        opener = _urlopener
81    if data is None:
82        return opener.open(url)
83    else:
84        return opener.open(url, data)
85def urlretrieve(url, filename=None, reporthook=None, data=None):
86    global _urlopener
87    if not _urlopener:
88        _urlopener = FancyURLopener()
89    return _urlopener.retrieve(url, filename, reporthook, data)
90def urlcleanup():
91    if _urlopener:
92        _urlopener.cleanup()
93
94# exception raised when downloaded size does not match content-length
95class ContentTooShortError(IOError):
96    def __init__(self, message, content):
97        IOError.__init__(self, message)
98        self.content = content
99
100ftpcache = {}
101class URLopener:
102    """Class to open URLs.
103    This is a class rather than just a subroutine because we may need
104    more than one set of global protocol-specific options.
105    Note -- this is a base class for those who don't want the
106    automatic handling of errors type 302 (relocated) and 401
107    (authorization needed)."""
108
109    __tempfiles = None
110
111    version = "Python-urllib/%s" % __version__
112
113    # Constructor
114    def __init__(self, proxies=None, **x509):
115        if proxies is None:
116            proxies = getproxies()
117        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
118        self.proxies = proxies
119        self.key_file = x509.get('key_file')
120        self.cert_file = x509.get('cert_file')
121        self.addheaders = [('User-Agent', self.version)]
122        self.__tempfiles = []
123        self.__unlink = os.unlink # See cleanup()
124        self.tempcache = None
125        # Undocumented feature: if you assign {} to tempcache,
126        # it is used to cache files retrieved with
127        # self.retrieve().  This is not enabled by default
128        # since it does not work for changing documents (and I
129        # haven't got the logic to check expiration headers
130        # yet).
131        self.ftpcache = ftpcache
132        # Undocumented feature: you can use a different
133        # ftp cache by assigning to the .ftpcache member;
134        # in case you want logically independent URL openers
135        # XXX This is not threadsafe.  Bah.
136
137    def __del__(self):
138        self.close()
139
140    def close(self):
141        self.cleanup()
142
143    def cleanup(self):
144        # This code sometimes runs when the rest of this module
145        # has already been deleted, so it can't use any globals
146        # or import anything.
147        if self.__tempfiles:
148            for file in self.__tempfiles:
149                try:
150                    self.__unlink(file)
151                except OSError:
152                    pass
153            del self.__tempfiles[:]
154        if self.tempcache:
155            self.tempcache.clear()
156
157    def addheader(self, *args):
158        """Add a header to be used by the HTTP interface only
159        e.g. u.addheader('Accept', 'sound/basic')"""
160        self.addheaders.append(args)
161
162    # External interface
163    def open(self, fullurl, data=None):
164        """Use URLopener().open(file) instead of open(file, 'r')."""
165        fullurl = unwrap(toBytes(fullurl))
166        if self.tempcache and fullurl in self.tempcache:
167            filename, headers = self.tempcache[fullurl]
168            fp = open(filename, 'rb')
169            return addinfourl(fp, headers, fullurl)
170        urltype, url = splittype(fullurl)
171        if not urltype:
172            urltype = 'file'
173        if urltype in self.proxies:
174            proxy = self.proxies[urltype]
175            urltype, proxyhost = splittype(proxy)
176            host, selector = splithost(proxyhost)
177            url = (host, fullurl) # Signal special case to open_*()
178        else:
179            proxy = None
180        name = 'open_' + urltype
181        self.type = urltype
182        name = name.replace('-', '_')
183        if not hasattr(self, name):
184            if proxy:
185                return self.open_unknown_proxy(proxy, fullurl, data)
186            else:
187                return self.open_unknown(fullurl, data)
188        try:
189            if data is None:
190                return getattr(self, name)(url)
191            else:
192                return getattr(self, name)(url, data)
193        except socket.error, msg:
194            raise IOError, ('socket error', msg), sys.exc_info()[2]
195
196    def open_unknown(self, fullurl, data=None):
197        """Overridable interface to open unknown URL type."""
198        type, url = splittype(fullurl)
199        raise IOError, ('url error', 'unknown url type', type)
200
201    def open_unknown_proxy(self, proxy, fullurl, data=None):
202        """Overridable interface to open unknown URL type."""
203        type, url = splittype(fullurl)
204        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
205
206    # External interface
207    def retrieve(self, url, filename=None, reporthook=None, data=None):
208        """retrieve(url) returns (filename, headers) for a local object
209        or (tempfilename, headers) for a remote object."""
210        url = unwrap(toBytes(url))
211        if self.tempcache and url in self.tempcache:
212            return self.tempcache[url]
213        type, url1 = splittype(url)
214        if filename is None and (not type or type == 'file'):
215            try:
216                fp = self.open_local_file(url1)
217                hdrs = fp.info()
218                del fp
219                return url2pathname(splithost(url1)[1]), hdrs
220            except IOError, msg:
221                pass
222        fp = self.open(url, data)
223        headers = fp.info()
224        if filename:
225            tfp = open(filename, 'wb')
226        else:
227            import tempfile
228            garbage, path = splittype(url)
229            garbage, path = splithost(path or "")
230            path, garbage = splitquery(path or "")
231            path, garbage = splitattr(path or "")
232            suffix = os.path.splitext(path)[1]
233            (fd, filename) = tempfile.mkstemp(suffix)
234            self.__tempfiles.append(filename)
235            tfp = os.fdopen(fd, 'wb')
236        result = filename, headers
237        if self.tempcache is not None:
238            self.tempcache[url] = result
239        bs = 1024*8
240        size = -1
241        read = 0
242        blocknum = 0
243        if reporthook:
244            if "content-length" in headers:
245                size = int(headers["Content-Length"])
246            reporthook(blocknum, bs, size)
247        while 1:
248            block = fp.read(bs)
249            if block == "":
250                break
251            read += len(block)
252            tfp.write(block)
253            blocknum += 1
254            if reporthook:
255                reporthook(blocknum, bs, size)
256        fp.close()
257        tfp.close()
258        del fp
259        del tfp
260
261        # raise exception if actual size does not match content-length header
262        if size >= 0 and read < size:
263            raise ContentTooShortError("retrieval incomplete: got only %i out "
264                                       "of %i bytes" % (read, size), result)
265
266        return result
267
268    # Each method named open_<type> knows how to open that type of URL
269
270    def open_http(self, url, data=None):
271        """Use HTTP protocol."""
272        import httplib
273        user_passwd = None
274        proxy_passwd= None
275        if isinstance(url, str):
276            host, selector = splithost(url)
277            if host:
278                user_passwd, host = splituser(host)
279                host = unquote(host)
280            realhost = host
281        else:
282            host, selector = url
283            # check whether the proxy contains authorization information
284            proxy_passwd, host = splituser(host)
285            # now we proceed with the url we want to obtain
286            urltype, rest = splittype(selector)
287            url = rest
288            user_passwd = None
289            if urltype.lower() != 'http':
290                realhost = None
291            else:
292                realhost, rest = splithost(rest)
293                if realhost:
294                    user_passwd, realhost = splituser(realhost)
295                if user_passwd:
296                    selector = "%s://%s%s" % (urltype, realhost, rest)
297                if proxy_bypass(realhost):
298                    host = realhost
299
300            #print "proxy via http:", host, selector
301        if not host: raise IOError, ('http error', 'no host given')
302
303        if proxy_passwd:
304            import base64
305            proxy_auth = base64.b64encode(proxy_passwd).strip()
306        else:
307            proxy_auth = None
308
309        if user_passwd:
310            import base64
311            auth = base64.b64encode(user_passwd).strip()
312        else:
313            auth = None
314        h = httplib.HTTP(host)
315        if data is not None:
316            h.putrequest('POST', selector)
317            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
318            h.putheader('Content-Length', '%d' % len(data))
319        else:
320            h.putrequest('GET', selector)
321        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
322        if auth: h.putheader('Authorization', 'Basic %s' % auth)
323        if realhost: h.putheader('Host', realhost)
324        for args in self.addheaders: h.putheader(*args)
325        h.endheaders()
326        if data is not None:
327            h.send(data)
328        errcode, errmsg, headers = h.getreply()
329        fp = h.getfile()
330        if errcode == 200:
331            return addinfourl(fp, headers, "http:" + url)
332        else:
333            if data is None:
334                return self.http_error(url, fp, errcode, errmsg, headers)
335            else:
336                return self.http_error(url, fp, errcode, errmsg, headers, data)
337
338    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
339        """Handle http errors.
340        Derived class can override this, or provide specific handlers
341        named http_error_DDD where DDD is the 3-digit error code."""
342        # First check if there's a specific handler for this error
343        name = 'http_error_%d' % errcode
344        if hasattr(self, name):
345            method = getattr(self, name)
346            if data is None:
347                result = method(url, fp, errcode, errmsg, headers)
348            else:
349                result = method(url, fp, errcode, errmsg, headers, data)
350            if result: return result
351        return self.http_error_default(url, fp, errcode, errmsg, headers)
352
353    def http_error_default(self, url, fp, errcode, errmsg, headers):
354        """Default error handler: close the connection and raise IOError."""
355        void = fp.read()
356        fp.close()
357        raise IOError, ('http error', errcode, errmsg, headers)
358
359    if hasattr(socket, "ssl"):
360        def open_https(self, url, data=None):
361            """Use HTTPS protocol."""
362            import httplib
363            user_passwd = None
364            proxy_passwd = None
365            if isinstance(url, str):
366                host, selector = splithost(url)
367                if host:
368                    user_passwd, host = splituser(host)
369                    host = unquote(host)
370                realhost = host
371            else:
372                host, selector = url
373                # here, we determine, whether the proxy contains authorization information
374                proxy_passwd, host = splituser(host)
375                urltype, rest = splittype(selector)
376                url = rest
377                user_passwd = None
378                if urltype.lower() != 'https':
379                    realhost = None
380                else:
381                    realhost, rest = splithost(rest)
382                    if realhost:
383                        user_passwd, realhost = splituser(realhost)
384                    if user_passwd:
385                        selector = "%s://%s%s" % (urltype, realhost, rest)
386                #print "proxy via https:", host, selector
387            if not host: raise IOError, ('https error', 'no host given')
388            if proxy_passwd:
389                import base64
390                proxy_auth = base64.b64encode(proxy_passwd).strip()
391            else:
392                proxy_auth = None
393            if user_passwd:
394                import base64
395                auth = base64.b64encode(user_passwd).strip()
396            else:
397                auth = None
398            h = httplib.HTTPS(host, 0,
399                              key_file=self.key_file,
400                              cert_file=self.cert_file)
401            if data is not None:
402                h.putrequest('POST', selector)
403                h.putheader('Content-Type',
404                            'application/x-www-form-urlencoded')
405                h.putheader('Content-Length', '%d' % len(data))
406            else:
407                h.putrequest('GET', selector)
408            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
409            if auth: h.putheader('Authorization', 'Basic %s' % auth)
410            if realhost: h.putheader('Host', realhost)
411            for args in self.addheaders: h.putheader(*args)
412            h.endheaders()
413            if data is not None:
414                h.send(data)
415            errcode, errmsg, headers = h.getreply()
416            fp = h.getfile()
417            if errcode == 200:
418                return addinfourl(fp, headers, "https:" + url)
419            else:
420                if data is None:
421                    return self.http_error(url, fp, errcode, errmsg, headers)
422                else:
423                    return self.http_error(url, fp, errcode, errmsg, headers,
424                                           data)
425
426    def open_gopher(self, url):
427        """Use Gopher protocol."""
428        if not isinstance(url, str):
429            raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
430        import gopherlib
431        host, selector = splithost(url)
432        if not host: raise IOError, ('gopher error', 'no host given')
433        host = unquote(host)
434        type, selector = splitgophertype(selector)
435        selector, query = splitquery(selector)
436        selector = unquote(selector)
437        if query:
438            query = unquote(query)
439            fp = gopherlib.send_query(selector, query, host)
440        else:
441            fp = gopherlib.send_selector(selector, host)
442        return addinfourl(fp, noheaders(), "gopher:" + url)
443
444    def open_file(self, url):
445        """Use local file or FTP depending on form of URL."""
446        if not isinstance(url, str):
447            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
448        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
449            return self.open_ftp(url)
450        else:
451            return self.open_local_file(url)
452
453    def open_local_file(self, url):
454        """Use local file."""
455        import mimetypes, mimetools, email.utils
456        try:
457            from cStringIO import StringIO
458        except ImportError:
459            from StringIO import StringIO
460        host, file = splithost(url)
461        localname = url2pathname(file)
462        try:
463            stats = os.stat(localname)
464        except OSError, e:
465            raise IOError(e.errno, e.strerror, e.filename)
466        size = stats.st_size
467        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
468        mtype = mimetypes.guess_type(url)[0]
469        headers = mimetools.Message(StringIO(
470            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
471            (mtype or 'text/plain', size, modified)))
472        if not host:
473            urlfile = file
474            if file[:1] == '/':
475                urlfile = 'file://' + file
476            return addinfourl(open(localname, 'rb'),
477                              headers, urlfile)
478        host, port = splitport(host)
479        if not port \
480           and socket.gethostbyname(host) in (localhost(), thishost()):
481            urlfile = file
482            if file[:1] == '/':
483                urlfile = 'file://' + file
484            return addinfourl(open(localname, 'rb'),
485                              headers, urlfile)
486        raise IOError, ('local file error', 'not on local host')
487
488    def open_ftp(self, url):
489        """Use FTP protocol."""
490        if not isinstance(url, str):
491            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
492        import mimetypes, mimetools
493        try:
494            from cStringIO import StringIO
495        except ImportError:
496            from StringIO import StringIO
497        host, path = splithost(url)
498        if not host: raise IOError, ('ftp error', 'no host given')
499        host, port = splitport(host)
500        user, host = splituser(host)
501        if user: user, passwd = splitpasswd(user)
502        else: passwd = None
503        host = unquote(host)
504        user = unquote(user or '')
505        passwd = unquote(passwd or '')
506        host = socket.gethostbyname(host)
507        if not port:
508            import ftplib
509            port = ftplib.FTP_PORT
510        else:
511            port = int(port)
512        path, attrs = splitattr(path)
513        path = unquote(path)
514        dirs = path.split('/')
515        dirs, file = dirs[:-1], dirs[-1]
516        if dirs and not dirs[0]: dirs = dirs[1:]
517        if dirs and not dirs[0]: dirs[0] = '/'
518        key = user, host, port, '/'.join(dirs)
519        # XXX thread unsafe!
520        if len(self.ftpcache) > MAXFTPCACHE:
521            # Prune the cache, rather arbitrarily
522            for k in self.ftpcache.keys():
523                if k != key:
524                    v = self.ftpcache[k]
525                    del self.ftpcache[k]
526                    v.close()
527        try:
528            if not key in self.ftpcache:
529                self.ftpcache[key] = \
530                    ftpwrapper(user, passwd, host, port, dirs)
531            if not file: type = 'D'
532            else: type = 'I'
533            for attr in attrs:
534                attr, value = splitvalue(attr)
535                if attr.lower() == 'type' and \
536                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
537                    type = value.upper()
538            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
539            mtype = mimetypes.guess_type("ftp:" + url)[0]
540            headers = ""
541            if mtype:
542                headers += "Content-Type: %s\n" % mtype
543            if retrlen is not None and retrlen >= 0:
544                headers += "Content-Length: %d\n" % retrlen
545            headers = mimetools.Message(StringIO(headers))
546            return addinfourl(fp, headers, "ftp:" + url)
547        except ftperrors(), msg:
548            raise IOError, ('ftp error', msg), sys.exc_info()[2]
549
550    def open_data(self, url, data=None):
551        """Use "data" URL."""
552        if not isinstance(url, str):
553            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
554        # ignore POSTed data
555        #
556        # syntax of data URLs:
557        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
558        # mediatype := [ type "/" subtype ] *( ";" parameter )
559        # data      := *urlchar
560        # parameter := attribute "=" value
561        import mimetools
562        try:
563            from cStringIO import StringIO
564        except ImportError:
565            from StringIO import StringIO
566        try:
567            [type, data] = url.split(',', 1)
568        except ValueError:
569            raise IOError, ('data error', 'bad data URL')
570        if not type:
571            type = 'text/plain;charset=US-ASCII'
572        semi = type.rfind(';')
573        if semi >= 0 and '=' not in type[semi:]:
574            encoding = type[semi+1:]
575            type = type[:semi]
576        else:
577            encoding = ''
578        msg = []
579        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
580                                            time.gmtime(time.time())))
581        msg.append('Content-type: %s' % type)
582        if encoding == 'base64':
583            import base64
584            data = base64.decodestring(data)
585        else:
586            data = unquote(data)
587        msg.append('Content-Length: %d' % len(data))
588        msg.append('')
589        msg.append(data)
590        msg = '\n'.join(msg)
591        f = StringIO(msg)
592        headers = mimetools.Message(f, 0)
593        #f.fileno = None     # needed for addinfourl
594        return addinfourl(f, headers, url)
595
596
597class FancyURLopener(URLopener):
598    """Derived class with handlers for errors we can handle (perhaps)."""
599
600    def __init__(self, *args, **kwargs):
601        URLopener.__init__(self, *args, **kwargs)
602        self.auth_cache = {}
603        self.tries = 0
604        self.maxtries = 10
605
606    def http_error_default(self, url, fp, errcode, errmsg, headers):
607        """Default error handling -- don't raise an exception."""
608        return addinfourl(fp, headers, "http:" + url)
609
610    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
611        """Error 302 -- relocated (temporarily)."""
612        self.tries += 1
613        if self.maxtries and self.tries >= self.maxtries:
614            if hasattr(self, "http_error_500"):
615                meth = self.http_error_500
616            else:
617                meth = self.http_error_default
618            self.tries = 0
619            return meth(url, fp, 500,
620                        "Internal Server Error: Redirect Recursion", headers)
621        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
622                                        data)
623        self.tries = 0
624        return result
625
626    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
627        if 'location' in headers:
628            newurl = headers['location']
629        elif 'uri' in headers:
630            newurl = headers['uri']
631        else:
632            return
633        void = fp.read()
634        fp.close()
635        # In case the server sent a relative URL, join with original:
636        newurl = basejoin(self.type + ":" + url, newurl)
637        return self.open(newurl)
638
639    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
640        """Error 301 -- also relocated (permanently)."""
641        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
642
643    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
644        """Error 303 -- also relocated (essentially identical to 302)."""
645        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
646
647    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
648        """Error 307 -- relocated, but turn POST into error."""
649        if data is None:
650            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
651        else:
652            return self.http_error_default(url, fp, errcode, errmsg, headers)
653
654    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
655        """Error 401 -- authentication required.
656        This function supports Basic authentication only."""
657        if not 'www-authenticate' in headers:
658            URLopener.http_error_default(self, url, fp,
659                                         errcode, errmsg, headers)
660        stuff = headers['www-authenticate']
661        import re
662        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
663        if not match:
664            URLopener.http_error_default(self, url, fp,
665                                         errcode, errmsg, headers)
666        scheme, realm = match.groups()
667        if scheme.lower() != 'basic':
668            URLopener.http_error_default(self, url, fp,
669                                         errcode, errmsg, headers)
670        name = 'retry_' + self.type + '_basic_auth'
671        if data is None:
672            return getattr(self,name)(url, realm)
673        else:
674            return getattr(self,name)(url, realm, data)
675
676    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
677        """Error 407 -- proxy authentication required.
678        This function supports Basic authentication only."""
679        if not 'proxy-authenticate' in headers:
680            URLopener.http_error_default(self, url, fp,
681                                         errcode, errmsg, headers)
682        stuff = headers['proxy-authenticate']
683        import re
684        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
685        if not match:
686            URLopener.http_error_default(self, url, fp,
687                                         errcode, errmsg, headers)
688        scheme, realm = match.groups()
689        if scheme.lower() != 'basic':
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        name = 'retry_proxy_' + self.type + '_basic_auth'
693        if data is None:
694            return getattr(self,name)(url, realm)
695        else:
696            return getattr(self,name)(url, realm, data)
697
698    def retry_proxy_http_basic_auth(self, url, realm, data=None):
699        host, selector = splithost(url)
700        newurl = 'http://' + host + selector
701        proxy = self.proxies['http']
702        urltype, proxyhost = splittype(proxy)
703        proxyhost, proxyselector = splithost(proxyhost)
704        i = proxyhost.find('@') + 1
705        proxyhost = proxyhost[i:]
706        user, passwd = self.get_user_passwd(proxyhost, realm, i)
707        if not (user or passwd): return None
708        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
709        self.proxies['http'] = 'http://' + proxyhost + proxyselector
710        if data is None:
711            return self.open(newurl)
712        else:
713            return self.open(newurl, data)
714
715    def retry_proxy_https_basic_auth(self, url, realm, data=None):
716        host, selector = splithost(url)
717        newurl = 'https://' + host + selector
718        proxy = self.proxies['https']
719        urltype, proxyhost = splittype(proxy)
720        proxyhost, proxyselector = splithost(proxyhost)
721        i = proxyhost.find('@') + 1
722        proxyhost = proxyhost[i:]
723        user, passwd = self.get_user_passwd(proxyhost, realm, i)
724        if not (user or passwd): return None
725        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
726        self.proxies['https'] = 'https://' + proxyhost + proxyselector
727        if data is None:
728            return self.open(newurl)
729        else:
730            return self.open(newurl, data)
731
732    def retry_http_basic_auth(self, url, realm, data=None):
733        host, selector = splithost(url)
734        i = host.find('@') + 1
735        host = host[i:]
736        user, passwd = self.get_user_passwd(host, realm, i)
737        if not (user or passwd): return None
738        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
739        newurl = 'http://' + host + selector
740        if data is None:
741            return self.open(newurl)
742        else:
743            return self.open(newurl, data)
744
745    def retry_https_basic_auth(self, url, realm, data=None):
746        host, selector = splithost(url)
747        i = host.find('@') + 1
748        host = host[i:]
749        user, passwd = self.get_user_passwd(host, realm, i)
750        if not (user or passwd): return None
751        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
752        newurl = 'https://' + host + selector
753        if data is None:
754            return self.open(newurl)
755        else:
756            return self.open(newurl, data)
757
758    def get_user_passwd(self, host, realm, clear_cache = 0):
759        key = realm + '@' + host.lower()
760        if key in self.auth_cache:
761            if clear_cache:
762                del self.auth_cache[key]
763            else:
764                return self.auth_cache[key]
765        user, passwd = self.prompt_user_passwd(host, realm)
766        if user or passwd: self.auth_cache[key] = (user, passwd)
767        return user, passwd
768
769    def prompt_user_passwd(self, host, realm):
770        """Override this in a GUI environment!"""
771        import getpass
772        try:
773            user = raw_input("Enter username for %s at %s: " % (realm,
774                                                                host))
775            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
776                (user, realm, host))
777            return user, passwd
778        except KeyboardInterrupt:
779            print
780            return None, None
781
782
783# Utility functions
784
785_localhost = None
786def localhost():
787    """Return the IP address of the magic hostname 'localhost'."""
788    global _localhost
789    if _localhost is None:
790        _localhost = socket.gethostbyname('localhost')
791    return _localhost
792
793_thishost = None
794def thishost():
795    """Return the IP address of the current host."""
796    global _thishost
797    if _thishost is None:
798        _thishost = socket.gethostbyname(socket.gethostname())
799    return _thishost
800
801_ftperrors = None
802def ftperrors():
803    """Return the set of errors raised by the FTP class."""
804    global _ftperrors
805    if _ftperrors is None:
806        import ftplib
807        _ftperrors = ftplib.all_errors
808    return _ftperrors
809
810_noheaders = None
811def noheaders():
812    """Return an empty mimetools.Message object."""
813    global _noheaders
814    if _noheaders is None:
815        import mimetools
816        try:
817            from cStringIO import StringIO
818        except ImportError:
819            from StringIO import StringIO
820        _noheaders = mimetools.Message(StringIO(), 0)
821        _noheaders.fp.close()   # Recycle file descriptor
822    return _noheaders
823
824
825# Utility classes
826
827class ftpwrapper:
828    """Class used by open_ftp() for cache of open FTP connections."""
829
830    def __init__(self, user, passwd, host, port, dirs):
831        self.user = user
832        self.passwd = passwd
833        self.host = host
834        self.port = port
835        self.dirs = dirs
836        self.init()
837
838    def init(self):
839        import ftplib
840        self.busy = 0
841        self.ftp = ftplib.FTP()
842        self.ftp.connect(self.host, self.port)
843        self.ftp.login(self.user, self.passwd)
844        for dir in self.dirs:
845            self.ftp.cwd(dir)
846
847    def retrfile(self, file, type):
848        import ftplib
849        self.endtransfer()
850        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
851        else: cmd = 'TYPE ' + type; isdir = 0
852        try:
853            self.ftp.voidcmd(cmd)
854        except ftplib.all_errors:
855            self.init()
856            self.ftp.voidcmd(cmd)
857        conn = None
858        if file and not isdir:
859            # Try to retrieve as a file
860            try:
861                cmd = 'RETR ' + file
862                conn = self.ftp.ntransfercmd(cmd)
863            except ftplib.error_perm, reason:
864                if str(reason)[:3] != '550':
865                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
866        if not conn:
867            # Set transfer mode to ASCII!
868            self.ftp.voidcmd('TYPE A')
869            # Try a directory listing
870            if file: cmd = 'LIST ' + file
871            else: cmd = 'LIST'
872            conn = self.ftp.ntransfercmd(cmd)
873        self.busy = 1
874        # Pass back both a suitably decorated object and a retrieval length
875        return (addclosehook(conn[0].makefile('rb'),
876                             self.endtransfer), conn[1])
877    def endtransfer(self):
878        if not self.busy:
879            return
880        self.busy = 0
881        try:
882            self.ftp.voidresp()
883        except ftperrors():
884            pass
885
886    def close(self):
887        self.endtransfer()
888        try:
889            self.ftp.close()
890        except ftperrors():
891            pass
892
893class addbase:
894    """Base class for addinfo and addclosehook."""
895
896    def __init__(self, fp):
897        self.fp = fp
898        self.read = self.fp.read
899        self.readline = self.fp.readline
900        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
901        if hasattr(self.fp, "fileno"):
902            self.fileno = self.fp.fileno
903        else:
904            self.fileno = lambda: None
905        if hasattr(self.fp, "__iter__"):
906            self.__iter__ = self.fp.__iter__
907            if hasattr(self.fp, "next"):
908                self.next = self.fp.next
909
910    def __repr__(self):
911        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
912                                             id(self), self.fp)
913
914    def close(self):
915        self.read = None
916        self.readline = None
917        self.readlines = None
918        self.fileno = None
919        if self.fp: self.fp.close()
920        self.fp = None
921
922class addclosehook(addbase):
923    """Class to add a close hook to an open file."""
924
925    def __init__(self, fp, closehook, *hookargs):
926        addbase.__init__(self, fp)
927        self.closehook = closehook
928        self.hookargs = hookargs
929
930    def close(self):
931        addbase.close(self)
932        if self.closehook:
933            self.closehook(*self.hookargs)
934            self.closehook = None
935            self.hookargs = None
936
937class addinfo(addbase):
938    """class to add an info() method to an open file."""
939
940    def __init__(self, fp, headers):
941        addbase.__init__(self, fp)
942        self.headers = headers
943
944    def info(self):
945        return self.headers
946
947class addinfourl(addbase):
948    """class to add info() and geturl() methods to an open file."""
949
950    def __init__(self, fp, headers, url):
951        addbase.__init__(self, fp)
952        self.headers = headers
953        self.url = url
954
955    def info(self):
956        return self.headers
957
958    def geturl(self):
959        return self.url
960
961
962# Utilities to parse URLs (most of these return None for missing parts):
963# unwrap('<URL:type://host/path>') --> 'type://host/path'
964# splittype('type:opaquestring') --> 'type', 'opaquestring'
965# splithost('//host[:port]/path') --> 'host[:port]', '/path'
966# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
967# splitpasswd('user:passwd') -> 'user', 'passwd'
968# splitport('host:port') --> 'host', 'port'
969# splitquery('/path?query') --> '/path', 'query'
970# splittag('/path#tag') --> '/path', 'tag'
971# splitattr('/path;attr1=value1;attr2=value2;...') ->
972#   '/path', ['attr1=value1', 'attr2=value2', ...]
973# splitvalue('attr=value') --> 'attr', 'value'
974# splitgophertype('/Xselector') --> 'X', 'selector'
975# unquote('abc%20def') -> 'abc def'
976# quote('abc def') -> 'abc%20def')
977
978try:
979    unicode
980except NameError:
981    def _is_unicode(x):
982        return 0
983else:
984    def _is_unicode(x):
985        return isinstance(x, unicode)
986
987def toBytes(url):
988    """toBytes(u"URL") --> 'URL'."""
989    # Most URL schemes require ASCII. If that changes, the conversion
990    # can be relaxed
991    if _is_unicode(url):
992        try:
993            url = url.encode("ASCII")
994        except UnicodeError:
995            raise UnicodeError("URL " + repr(url) +
996                               " contains non-ASCII characters")
997    return url
998
999def unwrap(url):
1000    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1001    url = url.strip()
1002    if url[:1] == '<' and url[-1:] == '>':
1003        url = url[1:-1].strip()
1004    if url[:4] == 'URL:': url = url[4:].strip()
1005    return url
1006
1007_typeprog = None
1008def splittype(url):
1009    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1010    global _typeprog
1011    if _typeprog is None:
1012        import re
1013        _typeprog = re.compile('^([^/:]+):')
1014
1015    match = _typeprog.match(url)
1016    if match:
1017        scheme = match.group(1)
1018        return scheme.lower(), url[len(scheme) + 1:]
1019    return None, url
1020
1021_hostprog = None
1022def splithost(url):
1023    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1024    global _hostprog
1025    if _hostprog is None:
1026        import re
1027        _hostprog = re.compile('^//([^/?]*)(.*)$')
1028
1029    match = _hostprog.match(url)
1030    if match: return match.group(1, 2)
1031    return None, url
1032
1033_userprog = None
1034def splituser(host):
1035    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1036    global _userprog
1037    if _userprog is None:
1038        import re
1039        _userprog = re.compile('^(.*)@(.*)$')
1040
1041    match = _userprog.match(host)
1042    if match: return map(unquote, match.group(1, 2))
1043    return None, host
1044
1045_passwdprog = None
1046def splitpasswd(user):
1047    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1048    global _passwdprog
1049    if _passwdprog is None:
1050        import re
1051        _passwdprog = re.compile('^([^:]*):(.*)$')
1052
1053    match = _passwdprog.match(user)
1054    if match: return match.group(1, 2)
1055    return user, None
1056
1057# splittag('/path#tag') --> '/path', 'tag'
1058_portprog = None
1059def splitport(host):
1060    """splitport('host:port') --> 'host', 'port'."""
1061    global _portprog
1062    if _portprog is None:
1063        import re
1064        _portprog = re.compile('^(.*):([0-9]+)$')
1065
1066    match = _portprog.match(host)
1067    if match: return match.group(1, 2)
1068    return host, None
1069
1070_nportprog = None
1071def splitnport(host, defport=-1):
1072    """Split host and port, returning numeric port.
1073    Return given default port if no ':' found; defaults to -1.
1074    Return numerical port if a valid number are found after ':'.
1075    Return None if ':' but not a valid number."""
1076    global _nportprog
1077    if _nportprog is None:
1078        import re
1079        _nportprog = re.compile('^(.*):(.*)$')
1080
1081    match = _nportprog.match(host)
1082    if match:
1083        host, port = match.group(1, 2)
1084        try:
1085            if not port: raise ValueError, "no digits"
1086            nport = int(port)
1087        except ValueError:
1088            nport = None
1089        return host, nport
1090    return host, defport
1091
1092_queryprog = None
1093def splitquery(url):
1094    """splitquery('/path?query') --> '/path', 'query'."""
1095    global _queryprog
1096    if _queryprog is None:
1097        import re
1098        _queryprog = re.compile('^(.*)\?([^?]*)$')
1099
1100    match = _queryprog.match(url)
1101    if match: return match.group(1, 2)
1102    return url, None
1103
1104_tagprog = None
1105def splittag(url):
1106    """splittag('/path#tag') --> '/path', 'tag'."""
1107    global _tagprog
1108    if _tagprog is None:
1109        import re
1110        _tagprog = re.compile('^(.*)#([^#]*)$')
1111
1112    match = _tagprog.match(url)
1113    if match: return match.group(1, 2)
1114    return url, None
1115
1116def splitattr(url):
1117    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1118        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1119    words = url.split(';')
1120    return words[0], words[1:]
1121
1122_valueprog = None
1123def splitvalue(attr):
1124    """splitvalue('attr=value') --> 'attr', 'value'."""
1125    global _valueprog
1126    if _valueprog is None:
1127        import re
1128        _valueprog = re.compile('^([^=]*)=(.*)$')
1129
1130    match = _valueprog.match(attr)
1131    if match: return match.group(1, 2)
1132    return attr, None
1133
1134def splitgophertype(selector):
1135    """splitgophertype('/Xselector') --> 'X', 'selector'."""
1136    if selector[:1] == '/' and selector[1:2]:
1137        return selector[1], selector[2:]
1138    return None, selector
1139
1140_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1141_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1142
1143def unquote(s):
1144    """unquote('abc%20def') -> 'abc def'."""
1145    res = s.split('%')
1146    for i in xrange(1, len(res)):
1147        item = res[i]
1148        try:
1149            res[i] = _hextochr[item[:2]] + item[2:]
1150        except KeyError:
1151            res[i] = '%' + item
1152        except UnicodeDecodeError:
1153            res[i] = unichr(int(item[:2], 16)) + item[2:]
1154    return "".join(res)
1155
1156def unquote_plus(s):
1157    """unquote('%7e/abc+def') -> '~/abc def'"""
1158    s = s.replace('+', ' ')
1159    return unquote(s)
1160
1161always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1162               'abcdefghijklmnopqrstuvwxyz'
1163               '0123456789' '_.-')
1164_safemaps = {}
1165
1166def quote(s, safe = '/'):
1167    """quote('abc def') -> 'abc%20def'
1168
1169    Each part of a URL, e.g. the path info, the query, etc., has a
1170    different set of reserved characters that must be quoted.
1171
1172    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1173    the following reserved characters.
1174
1175    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1176                  "$" | ","
1177
1178    Each of these characters is reserved in some component of a URL,
1179    but not necessarily in all of them.
1180
1181    By default, the quote function is intended for quoting the path
1182    section of a URL.  Thus, it will not encode '/'.  This character
1183    is reserved, but in typical usage the quote function is being
1184    called on a path where the existing slash characters are used as
1185    reserved characters.
1186    """
1187    cachekey = (safe, always_safe)
1188    try:
1189        safe_map = _safemaps[cachekey]
1190    except KeyError:
1191        safe += always_safe
1192        safe_map = {}
1193        for i in range(256):
1194            c = chr(i)
1195            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1196        _safemaps[cachekey] = safe_map
1197    res = map(safe_map.__getitem__, s)
1198    return ''.join(res)
1199
1200def quote_plus(s, safe = ''):
1201    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1202    if ' ' in s:
1203        s = quote(s, safe + ' ')
1204        return s.replace(' ', '+')
1205    return quote(s, safe)
1206
1207def urlencode(query,doseq=0):
1208    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1209
1210    If any values in the query arg are sequences and doseq is true, each
1211    sequence element is converted to a separate parameter.
1212
1213    If the query arg is a sequence of two-element tuples, the order of the
1214    parameters in the output will match the order of parameters in the
1215    input.
1216    """
1217
1218    if hasattr(query,"items"):
1219        # mapping objects
1220        query = query.items()
1221    else:
1222        # it's a bother at times that strings and string-like objects are
1223        # sequences...
1224        try:
1225            # non-sequence items should not work with len()
1226            # non-empty strings will fail this
1227            if len(query) and not isinstance(query[0], tuple):
1228                raise TypeError
1229            # zero-length sequences of all types will get here and succeed,
1230            # but that's a minor nit - since the original implementation
1231            # allowed empty dicts that type of behavior probably should be
1232            # preserved for consistency
1233        except TypeError:
1234            ty,va,tb = sys.exc_info()
1235            raise TypeError, "not a valid non-string sequence or mapping object", tb
1236
1237    l = []
1238    if not doseq:
1239        # preserve old behavior
1240        for k, v in query:
1241            k = quote_plus(str(k))
1242            v = quote_plus(str(v))
1243            l.append(k + '=' + v)
1244    else:
1245        for k, v in query:
1246            k = quote_plus(str(k))
1247            if isinstance(v, str):
1248                v = quote_plus(v)
1249                l.append(k + '=' + v)
1250            elif _is_unicode(v):
1251                # is there a reasonable way to convert to ASCII?
1252                # encode generates a string, but "replace" or "ignore"
1253                # lose information and "strict" can raise UnicodeError
1254                v = quote_plus(v.encode("ASCII","replace"))
1255                l.append(k + '=' + v)
1256            else:
1257                try:
1258                    # is this a sufficient test for sequence-ness?
1259                    x = len(v)
1260                except TypeError:
1261                    # not a sequence
1262                    v = quote_plus(str(v))
1263                    l.append(k + '=' + v)
1264                else:
1265                    # loop over the sequence
1266                    for elt in v:
1267                        l.append(k + '=' + quote_plus(str(elt)))
1268    return '&'.join(l)
1269
1270# Proxy handling
1271def getproxies_environment():
1272    """Return a dictionary of scheme -> proxy server URL mappings.
1273
1274    Scan the environment for variables named <scheme>_proxy;
1275    this seems to be the standard convention.  If you need a
1276    different way, you can pass a proxies dictionary to the
1277    [Fancy]URLopener constructor.
1278
1279    """
1280    proxies = {}
1281    for name, value in os.environ.items():
1282        name = name.lower()
1283        if value and name[-6:] == '_proxy':
1284            proxies[name[:-6]] = value
1285    return proxies
1286
1287if sys.platform == 'darwin':
1288    def getproxies_internetconfig():
1289        """Return a dictionary of scheme -> proxy server URL mappings.
1290
1291        By convention the mac uses Internet Config to store
1292        proxies.  An HTTP proxy, for instance, is stored under
1293        the HttpProxy key.
1294
1295        """
1296        try:
1297            import ic
1298        except ImportError:
1299            return {}
1300
1301        try:
1302            config = ic.IC()
1303        except ic.error:
1304            return {}
1305        proxies = {}
1306        # HTTP:
1307        if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1308            try:
1309                value = config['HTTPProxyHost']
1310            except ic.error:
1311                pass
1312            else:
1313                proxies['http'] = 'http://%s' % value
1314        # FTP: XXXX To be done.
1315        # Gopher: XXXX To be done.
1316        return proxies
1317
1318    def proxy_bypass(x):
1319        return 0
1320
1321    def getproxies():
1322        return getproxies_environment() or getproxies_internetconfig()
1323
1324elif os.name == 'nt':
1325    def getproxies_registry():
1326        """Return a dictionary of scheme -> proxy server URL mappings.
1327
1328        Win32 uses the registry to store proxies.
1329
1330        """
1331        proxies = {}
1332        try:
1333            import _winreg
1334        except ImportError:
1335            # Std module, so should be around - but you never know!
1336            return proxies
1337        try:
1338            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1339                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1340            proxyEnable = _winreg.QueryValueEx(internetSettings,
1341                                               'ProxyEnable')[0]
1342            if proxyEnable:
1343                # Returned as Unicode but problems if not converted to ASCII
1344                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1345                                                       'ProxyServer')[0])
1346                if '=' in proxyServer:
1347                    # Per-protocol settings
1348                    for p in proxyServer.split(';'):
1349                        protocol, address = p.split('=', 1)
1350                        # See if address has a type:// prefix
1351                        import re
1352                        if not re.match('^([^/:]+)://', address):
1353                            address = '%s://%s' % (protocol, address)
1354                        proxies[protocol] = address
1355                else:
1356                    # Use one setting for all protocols
1357                    if proxyServer[:5] == 'http:':
1358                        proxies['http'] = proxyServer
1359                    else:
1360                        proxies['http'] = 'http://%s' % proxyServer
1361                        proxies['ftp'] = 'ftp://%s' % proxyServer
1362            internetSettings.Close()
1363        except (WindowsError, ValueError, TypeError):
1364            # Either registry key not found etc, or the value in an
1365            # unexpected format.
1366            # proxies already set up to be empty so nothing to do
1367            pass
1368        return proxies
1369
1370    def getproxies():
1371        """Return a dictionary of scheme -> proxy server URL mappings.
1372
1373        Returns settings gathered from the environment, if specified,
1374        or the registry.
1375
1376        """
1377        return getproxies_environment() or getproxies_registry()
1378
1379    def proxy_bypass(host):
1380        try:
1381            import _winreg
1382            import re
1383        except ImportError:
1384            # Std modules, so should be around - but you never know!
1385            return 0
1386        try:
1387            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1388                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1389            proxyEnable = _winreg.QueryValueEx(internetSettings,
1390                                               'ProxyEnable')[0]
1391            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1392                                                     'ProxyOverride')[0])
1393            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1394        except WindowsError:
1395            return 0
1396        if not proxyEnable or not proxyOverride:
1397            return 0
1398        # try to make a host list from name and IP address.
1399        rawHost, port = splitport(host)
1400        host = [rawHost]
1401        try:
1402            addr = socket.gethostbyname(rawHost)
1403            if addr != rawHost:
1404                host.append(addr)
1405        except socket.error:
1406            pass
1407        try:
1408            fqdn = socket.getfqdn(rawHost)
1409            if fqdn != rawHost:
1410                host.append(fqdn)
1411        except socket.error:
1412            pass
1413        # make a check value list from the registry entry: replace the
1414        # '<local>' string by the localhost entry and the corresponding
1415        # canonical entry.
1416        proxyOverride = proxyOverride.split(';')
1417        i = 0
1418        while i < len(proxyOverride):
1419            if proxyOverride[i] == '<local>':
1420                proxyOverride[i:i+1] = ['localhost',
1421                                        '127.0.0.1',
1422                                        socket.gethostname(),
1423                                        socket.gethostbyname(
1424                                            socket.gethostname())]
1425            i += 1
1426        # print proxyOverride
1427        # now check if we match one of the registry values.
1428        for test in proxyOverride:
1429            test = test.replace(".", r"\.")     # mask dots
1430            test = test.replace("*", r".*")     # change glob sequence
1431            test = test.replace("?", r".")      # change glob char
1432            for val in host:
1433                # print "%s <--> %s" %( test, val )
1434                if re.match(test, val, re.I):
1435                    return 1
1436        return 0
1437
1438else:
1439    # By default use environment variables
1440    getproxies = getproxies_environment
1441
1442    def proxy_bypass(host):
1443        return 0
1444
1445# Test and time quote() and unquote()
1446def test1():
1447    s = ''
1448    for i in range(256): s = s + chr(i)
1449    s = s*4
1450    t0 = time.time()
1451    qs = quote(s)
1452    uqs = unquote(qs)
1453    t1 = time.time()
1454    if uqs != s:
1455        print 'Wrong!'
1456    print repr(s)
1457    print repr(qs)
1458    print repr(uqs)
1459    print round(t1 - t0, 3), 'sec'
1460
1461
1462def reporthook(blocknum, blocksize, totalsize):
1463    # Report during remote transfers
1464    print "Block number: %d, Block size: %d, Total size: %d" % (
1465        blocknum, blocksize, totalsize)
1466
1467# Test program
1468def test(args=[]):
1469    if not args:
1470        args = [
1471            '/etc/passwd',
1472            'file:/etc/passwd',
1473            'file://localhost/etc/passwd',
1474            'ftp://ftp.python.org/pub/python/README',
1475##          'gopher://gopher.micro.umn.edu/1/',
1476            'http://www.python.org/index.html',
1477            ]
1478        if hasattr(URLopener, "open_https"):
1479            args.append('https://synergy.as.cmu.edu/~geek/')
1480    try:
1481        for url in args:
1482            print '-'*10, url, '-'*10
1483            fn, h = urlretrieve(url, None, reporthook)
1484            print fn
1485            if h:
1486                print '======'
1487                for k in h.keys(): print k + ':', h[k]
1488                print '======'
1489            fp = open(fn, 'rb')
1490            data = fp.read()
1491            del fp
1492            if '\r' in data:
1493                table = string.maketrans("", "")
1494                data = data.translate(table, "\r")
1495            print data
1496            fn, h = None, None
1497        print '-'*40
1498    finally:
1499        urlcleanup()
1500
1501def main():
1502    import getopt, sys
1503    try:
1504        opts, args = getopt.getopt(sys.argv[1:], "th")
1505    except getopt.error, msg:
1506        print msg
1507        print "Use -h for help"
1508        return
1509    t = 0
1510    for o, a in opts:
1511        if o == '-t':
1512            t = t + 1
1513        if o == '-h':
1514            print "Usage: python urllib.py [-t] [url ...]"
1515            print "-t runs self-test;",
1516            print "otherwise, contents of urls are printed"
1517            return
1518    if t:
1519        if t > 1:
1520            test1()
1521        test(args)
1522    else:
1523        if not args:
1524            print "Use -h for help"
1525        for url in args:
1526            print urlopen(url).read(),
1527
1528# Run test program when run as a script
1529if __name__ == '__main__':
1530    main()
1531