urllib.py revision b42c53e442b211d0ded1d4c9abd18c74d29ed663
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34           "urlencode", "url2pathname", "pathname2url", "splittag",
35           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37           "splitnport", "splitquery", "splitattr", "splitvalue",
38           "getproxies"]
39
40__version__ = '1.17'    # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'nt':
46    from nturl2path import url2pathname, pathname2url
47elif os.name == 'riscos':
48    from rourl2path import url2pathname, pathname2url
49else:
50    def url2pathname(pathname):
51        """OS-specific conversion from a relative URL of the 'file' scheme
52        to a file system path; not recommended for general use."""
53        return unquote(pathname)
54
55    def pathname2url(pathname):
56        """OS-specific conversion from a file system path to a relative URL
57        of the 'file' scheme; not recommended for general use."""
58        return quote(pathname)
59
60# This really consists of two pieces:
61# (1) a class which handles opening of all sorts of URLs
62#     (plus assorted utilities etc.)
63# (2) a set of functions for parsing URLs
64# XXX Should these be separated out into different modules?
65
66
67# Shortcut for basic usage
68_urlopener = None
69def urlopen(url, data=None, proxies=None):
70    """Create a file-like object for the specified URL to read from."""
71    from warnings import warnpy3k
72    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
73             "favor of urllib2.urlopen()", stacklevel=2)
74
75    global _urlopener
76    if proxies is not None:
77        opener = FancyURLopener(proxies=proxies)
78    elif not _urlopener:
79        opener = FancyURLopener()
80        _urlopener = opener
81    else:
82        opener = _urlopener
83    if data is None:
84        return opener.open(url)
85    else:
86        return opener.open(url, data)
87def urlretrieve(url, filename=None, reporthook=None, data=None):
88    global _urlopener
89    if not _urlopener:
90        _urlopener = FancyURLopener()
91    return _urlopener.retrieve(url, filename, reporthook, data)
92def urlcleanup():
93    if _urlopener:
94        _urlopener.cleanup()
95    _safe_quoters.clear()
96    ftpcache.clear()
97
98# check for SSL
99try:
100    import ssl
101except:
102    _have_ssl = False
103else:
104    _have_ssl = True
105
106# exception raised when downloaded size does not match content-length
107class ContentTooShortError(IOError):
108    def __init__(self, message, content):
109        IOError.__init__(self, message)
110        self.content = content
111
112ftpcache = {}
113class URLopener:
114    """Class to open URLs.
115    This is a class rather than just a subroutine because we may need
116    more than one set of global protocol-specific options.
117    Note -- this is a base class for those who don't want the
118    automatic handling of errors type 302 (relocated) and 401
119    (authorization needed)."""
120
121    __tempfiles = None
122
123    version = "Python-urllib/%s" % __version__
124
125    # Constructor
126    def __init__(self, proxies=None, **x509):
127        if proxies is None:
128            proxies = getproxies()
129        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
130        self.proxies = proxies
131        self.key_file = x509.get('key_file')
132        self.cert_file = x509.get('cert_file')
133        self.addheaders = [('User-Agent', self.version)]
134        self.__tempfiles = []
135        self.__unlink = os.unlink # See cleanup()
136        self.tempcache = None
137        # Undocumented feature: if you assign {} to tempcache,
138        # it is used to cache files retrieved with
139        # self.retrieve().  This is not enabled by default
140        # since it does not work for changing documents (and I
141        # haven't got the logic to check expiration headers
142        # yet).
143        self.ftpcache = ftpcache
144        # Undocumented feature: you can use a different
145        # ftp cache by assigning to the .ftpcache member;
146        # in case you want logically independent URL openers
147        # XXX This is not threadsafe.  Bah.
148
149    def __del__(self):
150        self.close()
151
152    def close(self):
153        self.cleanup()
154
155    def cleanup(self):
156        # This code sometimes runs when the rest of this module
157        # has already been deleted, so it can't use any globals
158        # or import anything.
159        if self.__tempfiles:
160            for file in self.__tempfiles:
161                try:
162                    self.__unlink(file)
163                except OSError:
164                    pass
165            del self.__tempfiles[:]
166        if self.tempcache:
167            self.tempcache.clear()
168
169    def addheader(self, *args):
170        """Add a header to be used by the HTTP interface only
171        e.g. u.addheader('Accept', 'sound/basic')"""
172        self.addheaders.append(args)
173
174    # External interface
175    def open(self, fullurl, data=None):
176        """Use URLopener().open(file) instead of open(file, 'r')."""
177        fullurl = unwrap(toBytes(fullurl))
178        # percent encode url, fixing lame server errors for e.g, like space
179        # within url paths.
180        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
181        if self.tempcache and fullurl in self.tempcache:
182            filename, headers = self.tempcache[fullurl]
183            fp = open(filename, 'rb')
184            return addinfourl(fp, headers, fullurl)
185        urltype, url = splittype(fullurl)
186        if not urltype:
187            urltype = 'file'
188        if urltype in self.proxies:
189            proxy = self.proxies[urltype]
190            urltype, proxyhost = splittype(proxy)
191            host, selector = splithost(proxyhost)
192            url = (host, fullurl) # Signal special case to open_*()
193        else:
194            proxy = None
195        name = 'open_' + urltype
196        self.type = urltype
197        name = name.replace('-', '_')
198        if not hasattr(self, name):
199            if proxy:
200                return self.open_unknown_proxy(proxy, fullurl, data)
201            else:
202                return self.open_unknown(fullurl, data)
203        try:
204            if data is None:
205                return getattr(self, name)(url)
206            else:
207                return getattr(self, name)(url, data)
208        except socket.error, msg:
209            raise IOError, ('socket error', msg), sys.exc_info()[2]
210
211    def open_unknown(self, fullurl, data=None):
212        """Overridable interface to open unknown URL type."""
213        type, url = splittype(fullurl)
214        raise IOError, ('url error', 'unknown url type', type)
215
216    def open_unknown_proxy(self, proxy, fullurl, data=None):
217        """Overridable interface to open unknown URL type."""
218        type, url = splittype(fullurl)
219        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
220
221    # External interface
222    def retrieve(self, url, filename=None, reporthook=None, data=None):
223        """retrieve(url) returns (filename, headers) for a local object
224        or (tempfilename, headers) for a remote object."""
225        url = unwrap(toBytes(url))
226        if self.tempcache and url in self.tempcache:
227            return self.tempcache[url]
228        type, url1 = splittype(url)
229        if filename is None and (not type or type == 'file'):
230            try:
231                fp = self.open_local_file(url1)
232                hdrs = fp.info()
233                fp.close()
234                return url2pathname(splithost(url1)[1]), hdrs
235            except IOError:
236                pass
237        fp = self.open(url, data)
238        try:
239            headers = fp.info()
240            if filename:
241                tfp = open(filename, 'wb')
242            else:
243                import tempfile
244                garbage, path = splittype(url)
245                garbage, path = splithost(path or "")
246                path, garbage = splitquery(path or "")
247                path, garbage = splitattr(path or "")
248                suffix = os.path.splitext(path)[1]
249                (fd, filename) = tempfile.mkstemp(suffix)
250                self.__tempfiles.append(filename)
251                tfp = os.fdopen(fd, 'wb')
252            try:
253                result = filename, headers
254                if self.tempcache is not None:
255                    self.tempcache[url] = result
256                bs = 1024*8
257                size = -1
258                read = 0
259                blocknum = 0
260                if reporthook:
261                    if "content-length" in headers:
262                        size = int(headers["Content-Length"])
263                    reporthook(blocknum, bs, size)
264                while 1:
265                    block = fp.read(bs)
266                    if block == "":
267                        break
268                    read += len(block)
269                    tfp.write(block)
270                    blocknum += 1
271                    if reporthook:
272                        reporthook(blocknum, bs, size)
273            finally:
274                tfp.close()
275        finally:
276            fp.close()
277
278        # raise exception if actual size does not match content-length header
279        if size >= 0 and read < size:
280            raise ContentTooShortError("retrieval incomplete: got only %i out "
281                                       "of %i bytes" % (read, size), result)
282
283        return result
284
285    # Each method named open_<type> knows how to open that type of URL
286
287    def open_http(self, url, data=None):
288        """Use HTTP protocol."""
289        import httplib
290        user_passwd = None
291        proxy_passwd= None
292        if isinstance(url, str):
293            host, selector = splithost(url)
294            if host:
295                user_passwd, host = splituser(host)
296                host = unquote(host)
297            realhost = host
298        else:
299            host, selector = url
300            # check whether the proxy contains authorization information
301            proxy_passwd, host = splituser(host)
302            # now we proceed with the url we want to obtain
303            urltype, rest = splittype(selector)
304            url = rest
305            user_passwd = None
306            if urltype.lower() != 'http':
307                realhost = None
308            else:
309                realhost, rest = splithost(rest)
310                if realhost:
311                    user_passwd, realhost = splituser(realhost)
312                if user_passwd:
313                    selector = "%s://%s%s" % (urltype, realhost, rest)
314                if proxy_bypass(realhost):
315                    host = realhost
316
317            #print "proxy via http:", host, selector
318        if not host: raise IOError, ('http error', 'no host given')
319
320        if proxy_passwd:
321            import base64
322            proxy_auth = base64.b64encode(proxy_passwd).strip()
323        else:
324            proxy_auth = None
325
326        if user_passwd:
327            import base64
328            auth = base64.b64encode(user_passwd).strip()
329        else:
330            auth = None
331        h = httplib.HTTP(host)
332        if data is not None:
333            h.putrequest('POST', selector)
334            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
335            h.putheader('Content-Length', '%d' % len(data))
336        else:
337            h.putrequest('GET', selector)
338        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
339        if auth: h.putheader('Authorization', 'Basic %s' % auth)
340        if realhost: h.putheader('Host', realhost)
341        for args in self.addheaders: h.putheader(*args)
342        h.endheaders(data)
343        errcode, errmsg, headers = h.getreply()
344        fp = h.getfile()
345        if errcode == -1:
346            if fp: fp.close()
347            # something went wrong with the HTTP status line
348            raise IOError, ('http protocol error', 0,
349                            'got a bad status line', None)
350        # According to RFC 2616, "2xx" code indicates that the client's
351        # request was successfully received, understood, and accepted.
352        if (200 <= errcode < 300):
353            return addinfourl(fp, headers, "http:" + url, errcode)
354        else:
355            if data is None:
356                return self.http_error(url, fp, errcode, errmsg, headers)
357            else:
358                return self.http_error(url, fp, errcode, errmsg, headers, data)
359
360    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
361        """Handle http errors.
362        Derived class can override this, or provide specific handlers
363        named http_error_DDD where DDD is the 3-digit error code."""
364        # First check if there's a specific handler for this error
365        name = 'http_error_%d' % errcode
366        if hasattr(self, name):
367            method = getattr(self, name)
368            if data is None:
369                result = method(url, fp, errcode, errmsg, headers)
370            else:
371                result = method(url, fp, errcode, errmsg, headers, data)
372            if result: return result
373        return self.http_error_default(url, fp, errcode, errmsg, headers)
374
375    def http_error_default(self, url, fp, errcode, errmsg, headers):
376        """Default error handler: close the connection and raise IOError."""
377        void = fp.read()
378        fp.close()
379        raise IOError, ('http error', errcode, errmsg, headers)
380
381    if _have_ssl:
382        def open_https(self, url, data=None):
383            """Use HTTPS protocol."""
384
385            import httplib
386            user_passwd = None
387            proxy_passwd = None
388            if isinstance(url, str):
389                host, selector = splithost(url)
390                if host:
391                    user_passwd, host = splituser(host)
392                    host = unquote(host)
393                realhost = host
394            else:
395                host, selector = url
396                # here, we determine, whether the proxy contains authorization information
397                proxy_passwd, host = splituser(host)
398                urltype, rest = splittype(selector)
399                url = rest
400                user_passwd = None
401                if urltype.lower() != 'https':
402                    realhost = None
403                else:
404                    realhost, rest = splithost(rest)
405                    if realhost:
406                        user_passwd, realhost = splituser(realhost)
407                    if user_passwd:
408                        selector = "%s://%s%s" % (urltype, realhost, rest)
409                #print "proxy via https:", host, selector
410            if not host: raise IOError, ('https error', 'no host given')
411            if proxy_passwd:
412                import base64
413                proxy_auth = base64.b64encode(proxy_passwd).strip()
414            else:
415                proxy_auth = None
416            if user_passwd:
417                import base64
418                auth = base64.b64encode(user_passwd).strip()
419            else:
420                auth = None
421            h = httplib.HTTPS(host, 0,
422                              key_file=self.key_file,
423                              cert_file=self.cert_file)
424            if data is not None:
425                h.putrequest('POST', selector)
426                h.putheader('Content-Type',
427                            'application/x-www-form-urlencoded')
428                h.putheader('Content-Length', '%d' % len(data))
429            else:
430                h.putrequest('GET', selector)
431            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
432            if auth: h.putheader('Authorization', 'Basic %s' % auth)
433            if realhost: h.putheader('Host', realhost)
434            for args in self.addheaders: h.putheader(*args)
435            h.endheaders(data)
436            errcode, errmsg, headers = h.getreply()
437            fp = h.getfile()
438            if errcode == -1:
439                if fp: fp.close()
440                # something went wrong with the HTTP status line
441                raise IOError, ('http protocol error', 0,
442                                'got a bad status line', None)
443            # According to RFC 2616, "2xx" code indicates that the client's
444            # request was successfully received, understood, and accepted.
445            if (200 <= errcode < 300):
446                return addinfourl(fp, headers, "https:" + url, errcode)
447            else:
448                if data is None:
449                    return self.http_error(url, fp, errcode, errmsg, headers)
450                else:
451                    return self.http_error(url, fp, errcode, errmsg, headers,
452                                           data)
453
454    def open_file(self, url):
455        """Use local file or FTP depending on form of URL."""
456        if not isinstance(url, str):
457            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
458        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
459            return self.open_ftp(url)
460        else:
461            return self.open_local_file(url)
462
463    def open_local_file(self, url):
464        """Use local file."""
465        import mimetypes, mimetools, email.utils
466        try:
467            from cStringIO import StringIO
468        except ImportError:
469            from StringIO import StringIO
470        host, file = splithost(url)
471        localname = url2pathname(file)
472        try:
473            stats = os.stat(localname)
474        except OSError, e:
475            raise IOError(e.errno, e.strerror, e.filename)
476        size = stats.st_size
477        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
478        mtype = mimetypes.guess_type(url)[0]
479        headers = mimetools.Message(StringIO(
480            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
481            (mtype or 'text/plain', size, modified)))
482        if not host:
483            urlfile = file
484            if file[:1] == '/':
485                urlfile = 'file://' + file
486            return addinfourl(open(localname, 'rb'),
487                              headers, urlfile)
488        host, port = splitport(host)
489        if not port \
490           and socket.gethostbyname(host) in (localhost(), thishost()):
491            urlfile = file
492            if file[:1] == '/':
493                urlfile = 'file://' + file
494            return addinfourl(open(localname, 'rb'),
495                              headers, urlfile)
496        raise IOError, ('local file error', 'not on local host')
497
498    def open_ftp(self, url):
499        """Use FTP protocol."""
500        if not isinstance(url, str):
501            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
502        import mimetypes, mimetools
503        try:
504            from cStringIO import StringIO
505        except ImportError:
506            from StringIO import StringIO
507        host, path = splithost(url)
508        if not host: raise IOError, ('ftp error', 'no host given')
509        host, port = splitport(host)
510        user, host = splituser(host)
511        if user: user, passwd = splitpasswd(user)
512        else: passwd = None
513        host = unquote(host)
514        user = user or ''
515        passwd = passwd or ''
516        host = socket.gethostbyname(host)
517        if not port:
518            import ftplib
519            port = ftplib.FTP_PORT
520        else:
521            port = int(port)
522        path, attrs = splitattr(path)
523        path = unquote(path)
524        dirs = path.split('/')
525        dirs, file = dirs[:-1], dirs[-1]
526        if dirs and not dirs[0]: dirs = dirs[1:]
527        if dirs and not dirs[0]: dirs[0] = '/'
528        key = user, host, port, '/'.join(dirs)
529        # XXX thread unsafe!
530        if len(self.ftpcache) > MAXFTPCACHE:
531            # Prune the cache, rather arbitrarily
532            for k in self.ftpcache.keys():
533                if k != key:
534                    v = self.ftpcache[k]
535                    del self.ftpcache[k]
536                    v.close()
537        try:
538            if not key in self.ftpcache:
539                self.ftpcache[key] = \
540                    ftpwrapper(user, passwd, host, port, dirs)
541            if not file: type = 'D'
542            else: type = 'I'
543            for attr in attrs:
544                attr, value = splitvalue(attr)
545                if attr.lower() == 'type' and \
546                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
547                    type = value.upper()
548            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
549            mtype = mimetypes.guess_type("ftp:" + url)[0]
550            headers = ""
551            if mtype:
552                headers += "Content-Type: %s\n" % mtype
553            if retrlen is not None and retrlen >= 0:
554                headers += "Content-Length: %d\n" % retrlen
555            headers = mimetools.Message(StringIO(headers))
556            return addinfourl(fp, headers, "ftp:" + url)
557        except ftperrors(), msg:
558            raise IOError, ('ftp error', msg), sys.exc_info()[2]
559
560    def open_data(self, url, data=None):
561        """Use "data" URL."""
562        if not isinstance(url, str):
563            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
564        # ignore POSTed data
565        #
566        # syntax of data URLs:
567        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
568        # mediatype := [ type "/" subtype ] *( ";" parameter )
569        # data      := *urlchar
570        # parameter := attribute "=" value
571        import mimetools
572        try:
573            from cStringIO import StringIO
574        except ImportError:
575            from StringIO import StringIO
576        try:
577            [type, data] = url.split(',', 1)
578        except ValueError:
579            raise IOError, ('data error', 'bad data URL')
580        if not type:
581            type = 'text/plain;charset=US-ASCII'
582        semi = type.rfind(';')
583        if semi >= 0 and '=' not in type[semi:]:
584            encoding = type[semi+1:]
585            type = type[:semi]
586        else:
587            encoding = ''
588        msg = []
589        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
590                                            time.gmtime(time.time())))
591        msg.append('Content-type: %s' % type)
592        if encoding == 'base64':
593            import base64
594            data = base64.decodestring(data)
595        else:
596            data = unquote(data)
597        msg.append('Content-Length: %d' % len(data))
598        msg.append('')
599        msg.append(data)
600        msg = '\n'.join(msg)
601        f = StringIO(msg)
602        headers = mimetools.Message(f, 0)
603        #f.fileno = None     # needed for addinfourl
604        return addinfourl(f, headers, url)
605
606
607class FancyURLopener(URLopener):
608    """Derived class with handlers for errors we can handle (perhaps)."""
609
610    def __init__(self, *args, **kwargs):
611        URLopener.__init__(self, *args, **kwargs)
612        self.auth_cache = {}
613        self.tries = 0
614        self.maxtries = 10
615
616    def http_error_default(self, url, fp, errcode, errmsg, headers):
617        """Default error handling -- don't raise an exception."""
618        return addinfourl(fp, headers, "http:" + url, errcode)
619
620    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
621        """Error 302 -- relocated (temporarily)."""
622        self.tries += 1
623        if self.maxtries and self.tries >= self.maxtries:
624            if hasattr(self, "http_error_500"):
625                meth = self.http_error_500
626            else:
627                meth = self.http_error_default
628            self.tries = 0
629            return meth(url, fp, 500,
630                        "Internal Server Error: Redirect Recursion", headers)
631        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
632                                        data)
633        self.tries = 0
634        return result
635
636    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
637        if 'location' in headers:
638            newurl = headers['location']
639        elif 'uri' in headers:
640            newurl = headers['uri']
641        else:
642            return
643        void = fp.read()
644        fp.close()
645        # In case the server sent a relative URL, join with original:
646        newurl = basejoin(self.type + ":" + url, newurl)
647
648        # For security reasons we do not allow redirects to protocols
649        # other than HTTP, HTTPS or FTP.
650        newurl_lower = newurl.lower()
651        if not (newurl_lower.startswith('http://') or
652                newurl_lower.startswith('https://') or
653                newurl_lower.startswith('ftp://')):
654            raise IOError('redirect error', errcode,
655                          errmsg + " - Redirection to url '%s' is not allowed" %
656                          newurl,
657                          headers)
658
659        return self.open(newurl)
660
661    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
662        """Error 301 -- also relocated (permanently)."""
663        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
664
665    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
666        """Error 303 -- also relocated (essentially identical to 302)."""
667        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
668
669    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
670        """Error 307 -- relocated, but turn POST into error."""
671        if data is None:
672            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
673        else:
674            return self.http_error_default(url, fp, errcode, errmsg, headers)
675
676    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
677        """Error 401 -- authentication required.
678        This function supports Basic authentication only."""
679        if not 'www-authenticate' in headers:
680            URLopener.http_error_default(self, url, fp,
681                                         errcode, errmsg, headers)
682        stuff = headers['www-authenticate']
683        import re
684        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
685        if not match:
686            URLopener.http_error_default(self, url, fp,
687                                         errcode, errmsg, headers)
688        scheme, realm = match.groups()
689        if scheme.lower() != 'basic':
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        name = 'retry_' + self.type + '_basic_auth'
693        if data is None:
694            return getattr(self,name)(url, realm)
695        else:
696            return getattr(self,name)(url, realm, data)
697
698    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
699        """Error 407 -- proxy authentication required.
700        This function supports Basic authentication only."""
701        if not 'proxy-authenticate' in headers:
702            URLopener.http_error_default(self, url, fp,
703                                         errcode, errmsg, headers)
704        stuff = headers['proxy-authenticate']
705        import re
706        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
707        if not match:
708            URLopener.http_error_default(self, url, fp,
709                                         errcode, errmsg, headers)
710        scheme, realm = match.groups()
711        if scheme.lower() != 'basic':
712            URLopener.http_error_default(self, url, fp,
713                                         errcode, errmsg, headers)
714        name = 'retry_proxy_' + self.type + '_basic_auth'
715        if data is None:
716            return getattr(self,name)(url, realm)
717        else:
718            return getattr(self,name)(url, realm, data)
719
720    def retry_proxy_http_basic_auth(self, url, realm, data=None):
721        host, selector = splithost(url)
722        newurl = 'http://' + host + selector
723        proxy = self.proxies['http']
724        urltype, proxyhost = splittype(proxy)
725        proxyhost, proxyselector = splithost(proxyhost)
726        i = proxyhost.find('@') + 1
727        proxyhost = proxyhost[i:]
728        user, passwd = self.get_user_passwd(proxyhost, realm, i)
729        if not (user or passwd): return None
730        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
731        self.proxies['http'] = 'http://' + proxyhost + proxyselector
732        if data is None:
733            return self.open(newurl)
734        else:
735            return self.open(newurl, data)
736
737    def retry_proxy_https_basic_auth(self, url, realm, data=None):
738        host, selector = splithost(url)
739        newurl = 'https://' + host + selector
740        proxy = self.proxies['https']
741        urltype, proxyhost = splittype(proxy)
742        proxyhost, proxyselector = splithost(proxyhost)
743        i = proxyhost.find('@') + 1
744        proxyhost = proxyhost[i:]
745        user, passwd = self.get_user_passwd(proxyhost, realm, i)
746        if not (user or passwd): return None
747        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
748        self.proxies['https'] = 'https://' + proxyhost + proxyselector
749        if data is None:
750            return self.open(newurl)
751        else:
752            return self.open(newurl, data)
753
754    def retry_http_basic_auth(self, url, realm, data=None):
755        host, selector = splithost(url)
756        i = host.find('@') + 1
757        host = host[i:]
758        user, passwd = self.get_user_passwd(host, realm, i)
759        if not (user or passwd): return None
760        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
761        newurl = 'http://' + host + selector
762        if data is None:
763            return self.open(newurl)
764        else:
765            return self.open(newurl, data)
766
767    def retry_https_basic_auth(self, url, realm, data=None):
768        host, selector = splithost(url)
769        i = host.find('@') + 1
770        host = host[i:]
771        user, passwd = self.get_user_passwd(host, realm, i)
772        if not (user or passwd): return None
773        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
774        newurl = 'https://' + host + selector
775        if data is None:
776            return self.open(newurl)
777        else:
778            return self.open(newurl, data)
779
780    def get_user_passwd(self, host, realm, clear_cache=0):
781        key = realm + '@' + host.lower()
782        if key in self.auth_cache:
783            if clear_cache:
784                del self.auth_cache[key]
785            else:
786                return self.auth_cache[key]
787        user, passwd = self.prompt_user_passwd(host, realm)
788        if user or passwd: self.auth_cache[key] = (user, passwd)
789        return user, passwd
790
791    def prompt_user_passwd(self, host, realm):
792        """Override this in a GUI environment!"""
793        import getpass
794        try:
795            user = raw_input("Enter username for %s at %s: " % (realm,
796                                                                host))
797            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
798                (user, realm, host))
799            return user, passwd
800        except KeyboardInterrupt:
801            print
802            return None, None
803
804
805# Utility functions
806
807_localhost = None
808def localhost():
809    """Return the IP address of the magic hostname 'localhost'."""
810    global _localhost
811    if _localhost is None:
812        _localhost = socket.gethostbyname('localhost')
813    return _localhost
814
815_thishost = None
816def thishost():
817    """Return the IP address of the current host."""
818    global _thishost
819    if _thishost is None:
820        _thishost = socket.gethostbyname(socket.gethostname())
821    return _thishost
822
823_ftperrors = None
824def ftperrors():
825    """Return the set of errors raised by the FTP class."""
826    global _ftperrors
827    if _ftperrors is None:
828        import ftplib
829        _ftperrors = ftplib.all_errors
830    return _ftperrors
831
832_noheaders = None
833def noheaders():
834    """Return an empty mimetools.Message object."""
835    global _noheaders
836    if _noheaders is None:
837        import mimetools
838        try:
839            from cStringIO import StringIO
840        except ImportError:
841            from StringIO import StringIO
842        _noheaders = mimetools.Message(StringIO(), 0)
843        _noheaders.fp.close()   # Recycle file descriptor
844    return _noheaders
845
846
847# Utility classes
848
849class ftpwrapper:
850    """Class used by open_ftp() for cache of open FTP connections."""
851
852    def __init__(self, user, passwd, host, port, dirs,
853                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
854                 persistent=False):
855        self.user = user
856        self.passwd = passwd
857        self.host = host
858        self.port = port
859        self.dirs = dirs
860        self.timeout = timeout
861        self.refcount = 0
862        self.keepalive = persistent
863        self.init()
864
865    def init(self):
866        import ftplib
867        self.busy = 0
868        self.ftp = ftplib.FTP()
869        self.ftp.connect(self.host, self.port, self.timeout)
870        self.ftp.login(self.user, self.passwd)
871        for dir in self.dirs:
872            self.ftp.cwd(dir)
873
874    def retrfile(self, file, type):
875        import ftplib
876        self.endtransfer()
877        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
878        else: cmd = 'TYPE ' + type; isdir = 0
879        try:
880            self.ftp.voidcmd(cmd)
881        except ftplib.all_errors:
882            self.init()
883            self.ftp.voidcmd(cmd)
884        conn = None
885        if file and not isdir:
886            # Try to retrieve as a file
887            try:
888                cmd = 'RETR ' + file
889                conn, retrlen = self.ftp.ntransfercmd(cmd)
890            except ftplib.error_perm, reason:
891                if str(reason)[:3] != '550':
892                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
893        if not conn:
894            # Set transfer mode to ASCII!
895            self.ftp.voidcmd('TYPE A')
896            # Try a directory listing. Verify that directory exists.
897            if file:
898                pwd = self.ftp.pwd()
899                try:
900                    try:
901                        self.ftp.cwd(file)
902                    except ftplib.error_perm, reason:
903                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
904                finally:
905                    self.ftp.cwd(pwd)
906                cmd = 'LIST ' + file
907            else:
908                cmd = 'LIST'
909            conn, retrlen = self.ftp.ntransfercmd(cmd)
910        self.busy = 1
911        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
912        self.refcount += 1
913        conn.close()
914        # Pass back both a suitably decorated object and a retrieval length
915        return (ftpobj, retrlen)
916
917    def endtransfer(self):
918        if not self.busy:
919            return
920        self.busy = 0
921        try:
922            self.ftp.voidresp()
923        except ftperrors():
924            pass
925
926    def close(self):
927        self.keepalive = False
928        if self.refcount <= 0:
929            self.real_close()
930
931    def file_close(self):
932        self.endtransfer()
933        self.refcount -= 1
934        if self.refcount <= 0 and not self.keepalive:
935            self.real_close()
936
937    def real_close(self):
938        self.endtransfer()
939        try:
940            self.ftp.close()
941        except ftperrors():
942            pass
943
944class addbase:
945    """Base class for addinfo and addclosehook."""
946
947    def __init__(self, fp):
948        self.fp = fp
949        self.read = self.fp.read
950        self.readline = self.fp.readline
951        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
952        if hasattr(self.fp, "fileno"):
953            self.fileno = self.fp.fileno
954        else:
955            self.fileno = lambda: None
956        if hasattr(self.fp, "__iter__"):
957            self.__iter__ = self.fp.__iter__
958            if hasattr(self.fp, "next"):
959                self.next = self.fp.next
960
961    def __repr__(self):
962        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
963                                             id(self), self.fp)
964
965    def close(self):
966        self.read = None
967        self.readline = None
968        self.readlines = None
969        self.fileno = None
970        if self.fp: self.fp.close()
971        self.fp = None
972
973class addclosehook(addbase):
974    """Class to add a close hook to an open file."""
975
976    def __init__(self, fp, closehook, *hookargs):
977        addbase.__init__(self, fp)
978        self.closehook = closehook
979        self.hookargs = hookargs
980
981    def close(self):
982        addbase.close(self)
983        if self.closehook:
984            self.closehook(*self.hookargs)
985            self.closehook = None
986            self.hookargs = None
987
988class addinfo(addbase):
989    """class to add an info() method to an open file."""
990
991    def __init__(self, fp, headers):
992        addbase.__init__(self, fp)
993        self.headers = headers
994
995    def info(self):
996        return self.headers
997
998class addinfourl(addbase):
999    """class to add info() and geturl() methods to an open file."""
1000
1001    def __init__(self, fp, headers, url, code=None):
1002        addbase.__init__(self, fp)
1003        self.headers = headers
1004        self.url = url
1005        self.code = code
1006
1007    def info(self):
1008        return self.headers
1009
1010    def getcode(self):
1011        return self.code
1012
1013    def geturl(self):
1014        return self.url
1015
1016
1017# Utilities to parse URLs (most of these return None for missing parts):
1018# unwrap('<URL:type://host/path>') --> 'type://host/path'
1019# splittype('type:opaquestring') --> 'type', 'opaquestring'
1020# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1021# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1022# splitpasswd('user:passwd') -> 'user', 'passwd'
1023# splitport('host:port') --> 'host', 'port'
1024# splitquery('/path?query') --> '/path', 'query'
1025# splittag('/path#tag') --> '/path', 'tag'
1026# splitattr('/path;attr1=value1;attr2=value2;...') ->
1027#   '/path', ['attr1=value1', 'attr2=value2', ...]
1028# splitvalue('attr=value') --> 'attr', 'value'
1029# unquote('abc%20def') -> 'abc def'
1030# quote('abc def') -> 'abc%20def')
1031
1032try:
1033    unicode
1034except NameError:
1035    def _is_unicode(x):
1036        return 0
1037else:
1038    def _is_unicode(x):
1039        return isinstance(x, unicode)
1040
1041def toBytes(url):
1042    """toBytes(u"URL") --> 'URL'."""
1043    # Most URL schemes require ASCII. If that changes, the conversion
1044    # can be relaxed
1045    if _is_unicode(url):
1046        try:
1047            url = url.encode("ASCII")
1048        except UnicodeError:
1049            raise UnicodeError("URL " + repr(url) +
1050                               " contains non-ASCII characters")
1051    return url
1052
1053def unwrap(url):
1054    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1055    url = url.strip()
1056    if url[:1] == '<' and url[-1:] == '>':
1057        url = url[1:-1].strip()
1058    if url[:4] == 'URL:': url = url[4:].strip()
1059    return url
1060
1061_typeprog = None
1062def splittype(url):
1063    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1064    global _typeprog
1065    if _typeprog is None:
1066        import re
1067        _typeprog = re.compile('^([^/:]+):')
1068
1069    match = _typeprog.match(url)
1070    if match:
1071        scheme = match.group(1)
1072        return scheme.lower(), url[len(scheme) + 1:]
1073    return None, url
1074
1075_hostprog = None
1076def splithost(url):
1077    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1078    global _hostprog
1079    if _hostprog is None:
1080        import re
1081        _hostprog = re.compile('^//([^/?]*)(.*)$')
1082
1083    match = _hostprog.match(url)
1084    if match:
1085        host_port = match.group(1)
1086        path = match.group(2)
1087        if path and not path.startswith('/'):
1088            path = '/' + path
1089        return host_port, path
1090    return None, url
1091
1092_userprog = None
1093def splituser(host):
1094    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1095    global _userprog
1096    if _userprog is None:
1097        import re
1098        _userprog = re.compile('^(.*)@(.*)$')
1099
1100    match = _userprog.match(host)
1101    if match: return match.group(1, 2)
1102    return None, host
1103
1104_passwdprog = None
1105def splitpasswd(user):
1106    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1107    global _passwdprog
1108    if _passwdprog is None:
1109        import re
1110        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1111
1112    match = _passwdprog.match(user)
1113    if match: return match.group(1, 2)
1114    return user, None
1115
1116# splittag('/path#tag') --> '/path', 'tag'
1117_portprog = None
1118def splitport(host):
1119    """splitport('host:port') --> 'host', 'port'."""
1120    global _portprog
1121    if _portprog is None:
1122        import re
1123        _portprog = re.compile('^(.*):([0-9]+)$')
1124
1125    match = _portprog.match(host)
1126    if match: return match.group(1, 2)
1127    return host, None
1128
1129_nportprog = None
1130def splitnport(host, defport=-1):
1131    """Split host and port, returning numeric port.
1132    Return given default port if no ':' found; defaults to -1.
1133    Return numerical port if a valid number are found after ':'.
1134    Return None if ':' but not a valid number."""
1135    global _nportprog
1136    if _nportprog is None:
1137        import re
1138        _nportprog = re.compile('^(.*):(.*)$')
1139
1140    match = _nportprog.match(host)
1141    if match:
1142        host, port = match.group(1, 2)
1143        try:
1144            if not port: raise ValueError, "no digits"
1145            nport = int(port)
1146        except ValueError:
1147            nport = None
1148        return host, nport
1149    return host, defport
1150
1151_queryprog = None
1152def splitquery(url):
1153    """splitquery('/path?query') --> '/path', 'query'."""
1154    global _queryprog
1155    if _queryprog is None:
1156        import re
1157        _queryprog = re.compile('^(.*)\?([^?]*)$')
1158
1159    match = _queryprog.match(url)
1160    if match: return match.group(1, 2)
1161    return url, None
1162
1163_tagprog = None
1164def splittag(url):
1165    """splittag('/path#tag') --> '/path', 'tag'."""
1166    global _tagprog
1167    if _tagprog is None:
1168        import re
1169        _tagprog = re.compile('^(.*)#([^#]*)$')
1170
1171    match = _tagprog.match(url)
1172    if match: return match.group(1, 2)
1173    return url, None
1174
1175def splitattr(url):
1176    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1177        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1178    words = url.split(';')
1179    return words[0], words[1:]
1180
1181_valueprog = None
1182def splitvalue(attr):
1183    """splitvalue('attr=value') --> 'attr', 'value'."""
1184    global _valueprog
1185    if _valueprog is None:
1186        import re
1187        _valueprog = re.compile('^([^=]*)=(.*)$')
1188
1189    match = _valueprog.match(attr)
1190    if match: return match.group(1, 2)
1191    return attr, None
1192
1193# urlparse contains a duplicate of this method to avoid a circular import.  If
1194# you update this method, also update the copy in urlparse.  This code
1195# duplication does not exist in Python3.
1196
1197_hexdig = '0123456789ABCDEFabcdef'
1198_hextochr = dict((a + b, chr(int(a + b, 16)))
1199                 for a in _hexdig for b in _hexdig)
1200
1201def unquote(s):
1202    """unquote('abc%20def') -> 'abc def'."""
1203    res = s.split('%')
1204    # fastpath
1205    if len(res) == 1:
1206        return s
1207    s = res[0]
1208    for item in res[1:]:
1209        try:
1210            s += _hextochr[item[:2]] + item[2:]
1211        except KeyError:
1212            s += '%' + item
1213        except UnicodeDecodeError:
1214            s += unichr(int(item[:2], 16)) + item[2:]
1215    return s
1216
1217def unquote_plus(s):
1218    """unquote('%7e/abc+def') -> '~/abc def'"""
1219    s = s.replace('+', ' ')
1220    return unquote(s)
1221
1222always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1223               'abcdefghijklmnopqrstuvwxyz'
1224               '0123456789' '_.-')
1225_safe_map = {}
1226for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1227    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1228_safe_quoters = {}
1229
1230def quote(s, safe='/'):
1231    """quote('abc def') -> 'abc%20def'
1232
1233    Each part of a URL, e.g. the path info, the query, etc., has a
1234    different set of reserved characters that must be quoted.
1235
1236    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1237    the following reserved characters.
1238
1239    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1240                  "$" | ","
1241
1242    Each of these characters is reserved in some component of a URL,
1243    but not necessarily in all of them.
1244
1245    By default, the quote function is intended for quoting the path
1246    section of a URL.  Thus, it will not encode '/'.  This character
1247    is reserved, but in typical usage the quote function is being
1248    called on a path where the existing slash characters are used as
1249    reserved characters.
1250    """
1251    # fastpath
1252    if not s:
1253        if s is None:
1254            raise TypeError('None object cannot be quoted')
1255        return s
1256    cachekey = (safe, always_safe)
1257    try:
1258        (quoter, safe) = _safe_quoters[cachekey]
1259    except KeyError:
1260        safe_map = _safe_map.copy()
1261        safe_map.update([(c, c) for c in safe])
1262        quoter = safe_map.__getitem__
1263        safe = always_safe + safe
1264        _safe_quoters[cachekey] = (quoter, safe)
1265    if not s.rstrip(safe):
1266        return s
1267    return ''.join(map(quoter, s))
1268
1269def quote_plus(s, safe=''):
1270    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1271    if ' ' in s:
1272        s = quote(s, safe + ' ')
1273        return s.replace(' ', '+')
1274    return quote(s, safe)
1275
1276def urlencode(query, doseq=0):
1277    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1278
1279    If any values in the query arg are sequences and doseq is true, each
1280    sequence element is converted to a separate parameter.
1281
1282    If the query arg is a sequence of two-element tuples, the order of the
1283    parameters in the output will match the order of parameters in the
1284    input.
1285    """
1286
1287    if hasattr(query,"items"):
1288        # mapping objects
1289        query = query.items()
1290    else:
1291        # it's a bother at times that strings and string-like objects are
1292        # sequences...
1293        try:
1294            # non-sequence items should not work with len()
1295            # non-empty strings will fail this
1296            if len(query) and not isinstance(query[0], tuple):
1297                raise TypeError
1298            # zero-length sequences of all types will get here and succeed,
1299            # but that's a minor nit - since the original implementation
1300            # allowed empty dicts that type of behavior probably should be
1301            # preserved for consistency
1302        except TypeError:
1303            ty,va,tb = sys.exc_info()
1304            raise TypeError, "not a valid non-string sequence or mapping object", tb
1305
1306    l = []
1307    if not doseq:
1308        # preserve old behavior
1309        for k, v in query:
1310            k = quote_plus(str(k))
1311            v = quote_plus(str(v))
1312            l.append(k + '=' + v)
1313    else:
1314        for k, v in query:
1315            k = quote_plus(str(k))
1316            if isinstance(v, str):
1317                v = quote_plus(v)
1318                l.append(k + '=' + v)
1319            elif _is_unicode(v):
1320                # is there a reasonable way to convert to ASCII?
1321                # encode generates a string, but "replace" or "ignore"
1322                # lose information and "strict" can raise UnicodeError
1323                v = quote_plus(v.encode("ASCII","replace"))
1324                l.append(k + '=' + v)
1325            else:
1326                try:
1327                    # is this a sufficient test for sequence-ness?
1328                    len(v)
1329                except TypeError:
1330                    # not a sequence
1331                    v = quote_plus(str(v))
1332                    l.append(k + '=' + v)
1333                else:
1334                    # loop over the sequence
1335                    for elt in v:
1336                        l.append(k + '=' + quote_plus(str(elt)))
1337    return '&'.join(l)
1338
1339# Proxy handling
1340def getproxies_environment():
1341    """Return a dictionary of scheme -> proxy server URL mappings.
1342
1343    Scan the environment for variables named <scheme>_proxy;
1344    this seems to be the standard convention.  If you need a
1345    different way, you can pass a proxies dictionary to the
1346    [Fancy]URLopener constructor.
1347
1348    """
1349    proxies = {}
1350    for name, value in os.environ.items():
1351        name = name.lower()
1352        if value and name[-6:] == '_proxy':
1353            proxies[name[:-6]] = value
1354    return proxies
1355
1356def proxy_bypass_environment(host):
1357    """Test if proxies should not be used for a particular host.
1358
1359    Checks the environment for a variable named no_proxy, which should
1360    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1361    """
1362    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1363    # '*' is special case for always bypass
1364    if no_proxy == '*':
1365        return 1
1366    # strip port off host
1367    hostonly, port = splitport(host)
1368    # check if the host ends with any of the DNS suffixes
1369    for name in no_proxy.split(','):
1370        if name and (hostonly.endswith(name) or host.endswith(name)):
1371            return 1
1372    # otherwise, don't bypass
1373    return 0
1374
1375
1376if sys.platform == 'darwin':
1377    from _scproxy import _get_proxy_settings, _get_proxies
1378
1379    def proxy_bypass_macosx_sysconf(host):
1380        """
1381        Return True iff this host shouldn't be accessed using a proxy
1382
1383        This function uses the MacOSX framework SystemConfiguration
1384        to fetch the proxy information.
1385        """
1386        import re
1387        import socket
1388        from fnmatch import fnmatch
1389
1390        hostonly, port = splitport(host)
1391
1392        def ip2num(ipAddr):
1393            parts = ipAddr.split('.')
1394            parts = map(int, parts)
1395            if len(parts) != 4:
1396                parts = (parts + [0, 0, 0, 0])[:4]
1397            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1398
1399        proxy_settings = _get_proxy_settings()
1400
1401        # Check for simple host names:
1402        if '.' not in host:
1403            if proxy_settings['exclude_simple']:
1404                return True
1405
1406        hostIP = None
1407
1408        for value in proxy_settings.get('exceptions', ()):
1409            # Items in the list are strings like these: *.local, 169.254/16
1410            if not value: continue
1411
1412            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1413            if m is not None:
1414                if hostIP is None:
1415                    try:
1416                        hostIP = socket.gethostbyname(hostonly)
1417                        hostIP = ip2num(hostIP)
1418                    except socket.error:
1419                        continue
1420
1421                base = ip2num(m.group(1))
1422                mask = m.group(2)
1423                if mask is None:
1424                    mask = 8 * (m.group(1).count('.') + 1)
1425
1426                else:
1427                    mask = int(mask[1:])
1428                mask = 32 - mask
1429
1430                if (hostIP >> mask) == (base >> mask):
1431                    return True
1432
1433            elif fnmatch(host, value):
1434                return True
1435
1436        return False
1437
1438    def getproxies_macosx_sysconf():
1439        """Return a dictionary of scheme -> proxy server URL mappings.
1440
1441        This function uses the MacOSX framework SystemConfiguration
1442        to fetch the proxy information.
1443        """
1444        return _get_proxies()
1445
1446    def proxy_bypass(host):
1447        if getproxies_environment():
1448            return proxy_bypass_environment(host)
1449        else:
1450            return proxy_bypass_macosx_sysconf(host)
1451
1452    def getproxies():
1453        return getproxies_environment() or getproxies_macosx_sysconf()
1454
1455elif os.name == 'nt':
1456    def getproxies_registry():
1457        """Return a dictionary of scheme -> proxy server URL mappings.
1458
1459        Win32 uses the registry to store proxies.
1460
1461        """
1462        proxies = {}
1463        try:
1464            import _winreg
1465        except ImportError:
1466            # Std module, so should be around - but you never know!
1467            return proxies
1468        try:
1469            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1470                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1471            proxyEnable = _winreg.QueryValueEx(internetSettings,
1472                                               'ProxyEnable')[0]
1473            if proxyEnable:
1474                # Returned as Unicode but problems if not converted to ASCII
1475                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1476                                                       'ProxyServer')[0])
1477                if '=' in proxyServer:
1478                    # Per-protocol settings
1479                    for p in proxyServer.split(';'):
1480                        protocol, address = p.split('=', 1)
1481                        # See if address has a type:// prefix
1482                        import re
1483                        if not re.match('^([^/:]+)://', address):
1484                            address = '%s://%s' % (protocol, address)
1485                        proxies[protocol] = address
1486                else:
1487                    # Use one setting for all protocols
1488                    if proxyServer[:5] == 'http:':
1489                        proxies['http'] = proxyServer
1490                    else:
1491                        proxies['http'] = 'http://%s' % proxyServer
1492                        proxies['https'] = 'https://%s' % proxyServer
1493                        proxies['ftp'] = 'ftp://%s' % proxyServer
1494            internetSettings.Close()
1495        except (WindowsError, ValueError, TypeError):
1496            # Either registry key not found etc, or the value in an
1497            # unexpected format.
1498            # proxies already set up to be empty so nothing to do
1499            pass
1500        return proxies
1501
1502    def getproxies():
1503        """Return a dictionary of scheme -> proxy server URL mappings.
1504
1505        Returns settings gathered from the environment, if specified,
1506        or the registry.
1507
1508        """
1509        return getproxies_environment() or getproxies_registry()
1510
1511    def proxy_bypass_registry(host):
1512        try:
1513            import _winreg
1514            import re
1515        except ImportError:
1516            # Std modules, so should be around - but you never know!
1517            return 0
1518        try:
1519            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1520                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1521            proxyEnable = _winreg.QueryValueEx(internetSettings,
1522                                               'ProxyEnable')[0]
1523            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1524                                                     'ProxyOverride')[0])
1525            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1526        except WindowsError:
1527            return 0
1528        if not proxyEnable or not proxyOverride:
1529            return 0
1530        # try to make a host list from name and IP address.
1531        rawHost, port = splitport(host)
1532        host = [rawHost]
1533        try:
1534            addr = socket.gethostbyname(rawHost)
1535            if addr != rawHost:
1536                host.append(addr)
1537        except socket.error:
1538            pass
1539        try:
1540            fqdn = socket.getfqdn(rawHost)
1541            if fqdn != rawHost:
1542                host.append(fqdn)
1543        except socket.error:
1544            pass
1545        # make a check value list from the registry entry: replace the
1546        # '<local>' string by the localhost entry and the corresponding
1547        # canonical entry.
1548        proxyOverride = proxyOverride.split(';')
1549        # now check if we match one of the registry values.
1550        for test in proxyOverride:
1551            if test == '<local>':
1552                if '.' not in rawHost:
1553                    return 1
1554            test = test.replace(".", r"\.")     # mask dots
1555            test = test.replace("*", r".*")     # change glob sequence
1556            test = test.replace("?", r".")      # change glob char
1557            for val in host:
1558                # print "%s <--> %s" %( test, val )
1559                if re.match(test, val, re.I):
1560                    return 1
1561        return 0
1562
1563    def proxy_bypass(host):
1564        """Return a dictionary of scheme -> proxy server URL mappings.
1565
1566        Returns settings gathered from the environment, if specified,
1567        or the registry.
1568
1569        """
1570        if getproxies_environment():
1571            return proxy_bypass_environment(host)
1572        else:
1573            return proxy_bypass_registry(host)
1574
1575else:
1576    # By default use environment variables
1577    getproxies = getproxies_environment
1578    proxy_bypass = proxy_bypass_environment
1579
1580# Test and time quote() and unquote()
1581def test1():
1582    s = ''
1583    for i in range(256): s = s + chr(i)
1584    s = s*4
1585    t0 = time.time()
1586    qs = quote(s)
1587    uqs = unquote(qs)
1588    t1 = time.time()
1589    if uqs != s:
1590        print 'Wrong!'
1591    print repr(s)
1592    print repr(qs)
1593    print repr(uqs)
1594    print round(t1 - t0, 3), 'sec'
1595
1596
1597def reporthook(blocknum, blocksize, totalsize):
1598    # Report during remote transfers
1599    print "Block number: %d, Block size: %d, Total size: %d" % (
1600        blocknum, blocksize, totalsize)
1601
1602# Test program
1603def test(args=[]):
1604    if not args:
1605        args = [
1606            '/etc/passwd',
1607            'file:/etc/passwd',
1608            'file://localhost/etc/passwd',
1609            'ftp://ftp.gnu.org/pub/README',
1610            'http://www.python.org/index.html',
1611            ]
1612        if hasattr(URLopener, "open_https"):
1613            args.append('https://synergy.as.cmu.edu/~geek/')
1614    try:
1615        for url in args:
1616            print '-'*10, url, '-'*10
1617            fn, h = urlretrieve(url, None, reporthook)
1618            print fn
1619            if h:
1620                print '======'
1621                for k in h.keys(): print k + ':', h[k]
1622                print '======'
1623            with open(fn, 'rb') as fp:
1624                data = fp.read()
1625            if '\r' in data:
1626                table = string.maketrans("", "")
1627                data = data.translate(table, "\r")
1628            print data
1629            fn, h = None, None
1630        print '-'*40
1631    finally:
1632        urlcleanup()
1633
1634def main():
1635    import getopt, sys
1636    try:
1637        opts, args = getopt.getopt(sys.argv[1:], "th")
1638    except getopt.error, msg:
1639        print msg
1640        print "Use -h for help"
1641        return
1642    t = 0
1643    for o, a in opts:
1644        if o == '-t':
1645            t = t + 1
1646        if o == '-h':
1647            print "Usage: python urllib.py [-t] [url ...]"
1648            print "-t runs self-test;",
1649            print "otherwise, contents of urls are printed"
1650            return
1651    if t:
1652        if t > 1:
1653            test1()
1654        test(args)
1655    else:
1656        if not args:
1657            print "Use -h for help"
1658        for url in args:
1659            print urlopen(url).read(),
1660
1661# Run test program when run as a script
1662if __name__ == '__main__':
1663    main()
1664