14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"""Open an arbitrary URL.
24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
34adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoSee the following document for more info on URLs:
44adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"Names and Addresses, URIs, URLs, URNs, URCs", at
54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohttp://www.w3.org/pub/WWW/Addressing/Overview.html
64adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
74adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoSee also the HTTP spec (from which the error codes are derived):
84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"HTTP - Hypertext Transfer Protocol", at
94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohttp://www.w3.org/pub/WWW/Protocols/
104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
114adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoRelated standards and specs:
124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao- RFC1808: the "relative URL" spec. (authoritative status)
134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao- RFC1738 - the "URL standard". (authoritative status)
144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao- RFC1630 - the "URI spec". (informational status)
154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
164adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoThe object returned by URLopener().open(file) will differ per
174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoprotocol.  All you know is that is has methods read(), readline(),
184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoreadlines(), fileno(), close() and info().  The read*(), fileno()
194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoand close() methods work like those of open files.
204adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoThe info() method returns a mimetools.Message object which can be
214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoused to query various info about the object, if available.
224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao(mimetools.Message objects are queried with the getheader() method.)
234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"""
244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport string
264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport socket
274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport os
284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport time
294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport sys
304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport base64
314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport re
324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom urlparse import urljoin as basejoin
344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           "urlencode", "url2pathname", "pathname2url", "splittag",
384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           "splitnport", "splitquery", "splitattr", "splitvalue",
414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           "getproxies"]
424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__version__ = '1.17'    # XXX This version is not always updated :-(
444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
454adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMAXFTPCACHE = 10        # Trim the ftp cache beyond this size
464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Helper for non-unix systems
484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif os.name == 'nt':
494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    from nturl2path import url2pathname, pathname2url
504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelif os.name == 'riscos':
514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    from rourl2path import url2pathname, pathname2url
524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse:
534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def url2pathname(pathname):
544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """OS-specific conversion from a relative URL of the 'file' scheme
554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        to a file system path; not recommended for general use."""
564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return unquote(pathname)
574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def pathname2url(pathname):
594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """OS-specific conversion from a file system path to a relative URL
604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        of the 'file' scheme; not recommended for general use."""
614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return quote(pathname)
624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# This really consists of two pieces:
644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# (1) a class which handles opening of all sorts of URLs
654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao#     (plus assorted utilities etc.)
664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# (2) a set of functions for parsing URLs
674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# XXX Should these be separated out into different modules?
684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Shortcut for basic usage
714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_urlopener = None
724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlopen(url, data=None, proxies=None):
734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Create a file-like object for the specified URL to read from."""
744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    from warnings import warnpy3k
754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao             "favor of urllib2.urlopen()", stacklevel=2)
774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _urlopener
794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if proxies is not None:
804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        opener = FancyURLopener(proxies=proxies)
814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    elif not _urlopener:
824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        opener = FancyURLopener()
834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _urlopener = opener
844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        opener = _urlopener
864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if data is None:
874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return opener.open(url)
884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return opener.open(url, data)
904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlretrieve(url, filename=None, reporthook=None, data=None):
914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _urlopener
924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not _urlopener:
934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _urlopener = FancyURLopener()
944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _urlopener.retrieve(url, filename, reporthook, data)
954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlcleanup():
964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _urlopener:
974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _urlopener.cleanup()
984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    _safe_quoters.clear()
994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    ftpcache.clear()
1004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# check for SSL
1024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotry:
1034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    import ssl
1044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoexcept:
1054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    _have_ssl = False
1064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse:
1074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    _have_ssl = True
1084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# exception raised when downloaded size does not match content-length
1104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass ContentTooShortError(IOError):
1114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, message, content):
1124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        IOError.__init__(self, message)
1134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.content = content
1144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoftpcache = {}
1164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass URLopener:
1174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Class to open URLs.
1184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    This is a class rather than just a subroutine because we may need
1194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    more than one set of global protocol-specific options.
1204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Note -- this is a base class for those who don't want the
1214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    automatic handling of errors type 302 (relocated) and 401
1224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (authorization needed)."""
1234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    __tempfiles = None
1254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    version = "Python-urllib/%s" % __version__
1274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Constructor
1294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, proxies=None, **x509):
1304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if proxies is None:
1314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxies = getproxies()
1324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
1334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.proxies = proxies
1344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.key_file = x509.get('key_file')
1354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.cert_file = x509.get('cert_file')
1364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.addheaders = [('User-Agent', self.version)]
1374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.__tempfiles = []
1384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.__unlink = os.unlink # See cleanup()
1394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.tempcache = None
1404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Undocumented feature: if you assign {} to tempcache,
1414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # it is used to cache files retrieved with
1424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # self.retrieve().  This is not enabled by default
1434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # since it does not work for changing documents (and I
1444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # haven't got the logic to check expiration headers
1454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # yet).
1464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.ftpcache = ftpcache
1474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Undocumented feature: you can use a different
1484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # ftp cache by assigning to the .ftpcache member;
1494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # in case you want logically independent URL openers
1504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # XXX This is not threadsafe.  Bah.
1514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __del__(self):
1534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.close()
1544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def close(self):
1564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.cleanup()
1574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def cleanup(self):
1594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # This code sometimes runs when the rest of this module
1604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # has already been deleted, so it can't use any globals
1614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # or import anything.
1624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.__tempfiles:
1634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for file in self.__tempfiles:
1644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                try:
1654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    self.__unlink(file)
1664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                except OSError:
1674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    pass
1684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            del self.__tempfiles[:]
1694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.tempcache:
1704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.tempcache.clear()
1714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def addheader(self, *args):
1734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Add a header to be used by the HTTP interface only
1744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        e.g. u.addheader('Accept', 'sound/basic')"""
1754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.addheaders.append(args)
1764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # External interface
1784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open(self, fullurl, data=None):
1794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Use URLopener().open(file) instead of open(file, 'r')."""
1804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        fullurl = unwrap(toBytes(fullurl))
1814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # percent encode url, fixing lame server errors for e.g, like space
1824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # within url paths.
1834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.tempcache and fullurl in self.tempcache:
1854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            filename, headers = self.tempcache[fullurl]
1864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fp = open(filename, 'rb')
1874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return addinfourl(fp, headers, fullurl)
1884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        urltype, url = splittype(fullurl)
1894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not urltype:
1904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            urltype = 'file'
1914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if urltype in self.proxies:
1924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy = self.proxies[urltype]
1934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            urltype, proxyhost = splittype(proxy)
1944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            host, selector = splithost(proxyhost)
1954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            url = (host, fullurl) # Signal special case to open_*()
1964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
1974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy = None
1984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name = 'open_' + urltype
1994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.type = urltype
2004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name = name.replace('-', '_')
2014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not hasattr(self, name):
2024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if proxy:
2034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return self.open_unknown_proxy(proxy, fullurl, data)
2044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
2054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return self.open_unknown(fullurl, data)
2064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
2074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if data is None:
2084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return getattr(self, name)(url)
2094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
2104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return getattr(self, name)(url, data)
2114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except socket.error, msg:
2124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('socket error', msg), sys.exc_info()[2]
2134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_unknown(self, fullurl, data=None):
2154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Overridable interface to open unknown URL type."""
2164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        type, url = splittype(fullurl)
2174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise IOError, ('url error', 'unknown url type', type)
2184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_unknown_proxy(self, proxy, fullurl, data=None):
2204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Overridable interface to open unknown URL type."""
2214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        type, url = splittype(fullurl)
2224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
2234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # External interface
2254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def retrieve(self, url, filename=None, reporthook=None, data=None):
2264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """retrieve(url) returns (filename, headers) for a local object
2274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        or (tempfilename, headers) for a remote object."""
2284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        url = unwrap(toBytes(url))
2294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.tempcache and url in self.tempcache:
2304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.tempcache[url]
2314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        type, url1 = splittype(url)
2324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if filename is None and (not type or type == 'file'):
2334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
2344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                fp = self.open_local_file(url1)
2354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                hdrs = fp.info()
2364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                fp.close()
2374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return url2pathname(splithost(url1)[1]), hdrs
2384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except IOError:
2394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                pass
2404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        fp = self.open(url, data)
2414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
2424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            headers = fp.info()
2434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if filename:
2444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                tfp = open(filename, 'wb')
2454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
2464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                import tempfile
2474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                garbage, path = splittype(url)
2484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                garbage, path = splithost(path or "")
2494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                path, garbage = splitquery(path or "")
2504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                path, garbage = splitattr(path or "")
2514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                suffix = os.path.splitext(path)[1]
2524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                (fd, filename) = tempfile.mkstemp(suffix)
2534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.__tempfiles.append(filename)
2544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                tfp = os.fdopen(fd, 'wb')
2554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
2564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                result = filename, headers
2574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if self.tempcache is not None:
2584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    self.tempcache[url] = result
2594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                bs = 1024*8
2604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                size = -1
2614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                read = 0
2624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                blocknum = 0
2634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if "content-length" in headers:
2644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    size = int(headers["Content-Length"])
2654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if reporthook:
2664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    reporthook(blocknum, bs, size)
2674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                while 1:
2684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    block = fp.read(bs)
2694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if block == "":
2704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        break
2714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    read += len(block)
2724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    tfp.write(block)
2734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    blocknum += 1
2744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if reporthook:
2754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        reporthook(blocknum, bs, size)
2764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            finally:
2774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                tfp.close()
2784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
2794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fp.close()
2804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # raise exception if actual size does not match content-length header
2824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if size >= 0 and read < size:
2834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise ContentTooShortError("retrieval incomplete: got only %i out "
2844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                       "of %i bytes" % (read, size), result)
2854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return result
2874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Each method named open_<type> knows how to open that type of URL
2894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_http(self, url, data=None):
2914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Use HTTP protocol."""
2924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import httplib
2934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user_passwd = None
2944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxy_passwd= None
2954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if isinstance(url, str):
2964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            host, selector = splithost(url)
2974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if host:
2984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                user_passwd, host = splituser(host)
2994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                host = unquote(host)
3004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            realhost = host
3014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
3024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            host, selector = url
3034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # check whether the proxy contains authorization information
3044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy_passwd, host = splituser(host)
3054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # now we proceed with the url we want to obtain
3064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            urltype, rest = splittype(selector)
3074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            url = rest
3084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            user_passwd = None
3094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if urltype.lower() != 'http':
3104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                realhost = None
3114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
3124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                realhost, rest = splithost(rest)
3134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if realhost:
3144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    user_passwd, realhost = splituser(realhost)
3154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if user_passwd:
3164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    selector = "%s://%s%s" % (urltype, realhost, rest)
3174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if proxy_bypass(realhost):
3184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    host = realhost
3194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #print "proxy via http:", host, selector
3214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not host: raise IOError, ('http error', 'no host given')
3224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if proxy_passwd:
3244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy_passwd = unquote(proxy_passwd)
3254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy_auth = base64.b64encode(proxy_passwd).strip()
3264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
3274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy_auth = None
3284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if user_passwd:
3304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            user_passwd = unquote(user_passwd)
3314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            auth = base64.b64encode(user_passwd).strip()
3324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
3334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            auth = None
3344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        h = httplib.HTTP(host)
3354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is not None:
3364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            h.putrequest('POST', selector)
3374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
3384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            h.putheader('Content-Length', '%d' % len(data))
3394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
3404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            h.putrequest('GET', selector)
3414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
3424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if auth: h.putheader('Authorization', 'Basic %s' % auth)
3434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if realhost: h.putheader('Host', realhost)
3444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for args in self.addheaders: h.putheader(*args)
3454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        h.endheaders(data)
3464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        errcode, errmsg, headers = h.getreply()
3474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        fp = h.getfile()
3484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if errcode == -1:
3494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if fp: fp.close()
3504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # something went wrong with the HTTP status line
3514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('http protocol error', 0,
3524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                            'got a bad status line', None)
3534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # According to RFC 2616, "2xx" code indicates that the client's
3544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # request was successfully received, understood, and accepted.
3554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if (200 <= errcode < 300):
3564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return addinfourl(fp, headers, "http:" + url, errcode)
3574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
3584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if data is None:
3594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return self.http_error(url, fp, errcode, errmsg, headers)
3604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
3614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return self.http_error(url, fp, errcode, errmsg, headers, data)
3624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
3644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Handle http errors.
3654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Derived class can override this, or provide specific handlers
3664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        named http_error_DDD where DDD is the 3-digit error code."""
3674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # First check if there's a specific handler for this error
3684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name = 'http_error_%d' % errcode
3694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if hasattr(self, name):
3704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            method = getattr(self, name)
3714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if data is None:
3724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                result = method(url, fp, errcode, errmsg, headers)
3734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
3744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                result = method(url, fp, errcode, errmsg, headers, data)
3754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if result: return result
3764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.http_error_default(url, fp, errcode, errmsg, headers)
3774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_default(self, url, fp, errcode, errmsg, headers):
3794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Default error handler: close the connection and raise IOError."""
3804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        fp.close()
3814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise IOError, ('http error', errcode, errmsg, headers)
3824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _have_ssl:
3844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        def open_https(self, url, data=None):
3854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            """Use HTTPS protocol."""
3864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            import httplib
3884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            user_passwd = None
3894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxy_passwd = None
3904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if isinstance(url, str):
3914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                host, selector = splithost(url)
3924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if host:
3934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    user_passwd, host = splituser(host)
3944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    host = unquote(host)
3954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                realhost = host
3964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
3974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                host, selector = url
3984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # here, we determine, whether the proxy contains authorization information
3994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                proxy_passwd, host = splituser(host)
4004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                urltype, rest = splittype(selector)
4014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                url = rest
4024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                user_passwd = None
4034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if urltype.lower() != 'https':
4044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    realhost = None
4054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
4064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    realhost, rest = splithost(rest)
4074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if realhost:
4084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        user_passwd, realhost = splituser(realhost)
4094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if user_passwd:
4104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        selector = "%s://%s%s" % (urltype, realhost, rest)
4114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                #print "proxy via https:", host, selector
4124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not host: raise IOError, ('https error', 'no host given')
4134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if proxy_passwd:
4144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                proxy_passwd = unquote(proxy_passwd)
4154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                proxy_auth = base64.b64encode(proxy_passwd).strip()
4164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
4174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                proxy_auth = None
4184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if user_passwd:
4194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                user_passwd = unquote(user_passwd)
4204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                auth = base64.b64encode(user_passwd).strip()
4214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
4224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                auth = None
4234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            h = httplib.HTTPS(host, 0,
4244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                              key_file=self.key_file,
4254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                              cert_file=self.cert_file)
4264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if data is not None:
4274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                h.putrequest('POST', selector)
4284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                h.putheader('Content-Type',
4294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                            'application/x-www-form-urlencoded')
4304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                h.putheader('Content-Length', '%d' % len(data))
4314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
4324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                h.putrequest('GET', selector)
4334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
4344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if auth: h.putheader('Authorization', 'Basic %s' % auth)
4354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if realhost: h.putheader('Host', realhost)
4364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for args in self.addheaders: h.putheader(*args)
4374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            h.endheaders(data)
4384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            errcode, errmsg, headers = h.getreply()
4394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fp = h.getfile()
4404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if errcode == -1:
4414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if fp: fp.close()
4424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # something went wrong with the HTTP status line
4434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise IOError, ('http protocol error', 0,
4444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                'got a bad status line', None)
4454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # According to RFC 2616, "2xx" code indicates that the client's
4464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # request was successfully received, understood, and accepted.
4474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if (200 <= errcode < 300):
4484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return addinfourl(fp, headers, "https:" + url, errcode)
4494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
4504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if data is None:
4514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return self.http_error(url, fp, errcode, errmsg, headers)
4524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
4534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return self.http_error(url, fp, errcode, errmsg, headers,
4544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                           data)
4554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_file(self, url):
4574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Use local file or FTP depending on form of URL."""
4584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not isinstance(url, str):
4594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
4604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
4614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open_ftp(url)
4624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
4634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open_local_file(url)
4644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_local_file(self, url):
4664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Use local file."""
4674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import mimetypes, mimetools, email.utils
4684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
4694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from cStringIO import StringIO
4704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ImportError:
4714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from StringIO import StringIO
4724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, file = splithost(url)
4734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        localname = url2pathname(file)
4744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
4754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            stats = os.stat(localname)
4764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except OSError, e:
4774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError(e.errno, e.strerror, e.filename)
4784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        size = stats.st_size
4794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
4804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        mtype = mimetypes.guess_type(url)[0]
4814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        headers = mimetools.Message(StringIO(
4824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
4834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            (mtype or 'text/plain', size, modified)))
4844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not host:
4854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            urlfile = file
4864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if file[:1] == '/':
4874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                urlfile = 'file://' + file
4884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif file[:2] == './':
4894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
4904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return addinfourl(open(localname, 'rb'),
4914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                              headers, urlfile)
4924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, port = splitport(host)
4934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not port \
4944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           and socket.gethostbyname(host) in (localhost(), thishost()):
4954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            urlfile = file
4964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if file[:1] == '/':
4974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                urlfile = 'file://' + file
4984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return addinfourl(open(localname, 'rb'),
4994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                              headers, urlfile)
5004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise IOError, ('local file error', 'not on local host')
5014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_ftp(self, url):
5034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Use FTP protocol."""
5044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not isinstance(url, str):
5054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
5064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import mimetypes, mimetools
5074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
5084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from cStringIO import StringIO
5094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ImportError:
5104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from StringIO import StringIO
5114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, path = splithost(url)
5124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not host: raise IOError, ('ftp error', 'no host given')
5134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, port = splitport(host)
5144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user, host = splituser(host)
5154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if user: user, passwd = splitpasswd(user)
5164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else: passwd = None
5174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = unquote(host)
5184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user = user or ''
5194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        passwd = passwd or ''
5204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = socket.gethostbyname(host)
5214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not port:
5224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            import ftplib
5234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            port = ftplib.FTP_PORT
5244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
5254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            port = int(port)
5264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path, attrs = splitattr(path)
5274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path = unquote(path)
5284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        dirs = path.split('/')
5294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        dirs, file = dirs[:-1], dirs[-1]
5304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if dirs and not dirs[0]: dirs = dirs[1:]
5314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if dirs and not dirs[0]: dirs[0] = '/'
5324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        key = user, host, port, '/'.join(dirs)
5334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # XXX thread unsafe!
5344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if len(self.ftpcache) > MAXFTPCACHE:
5354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Prune the cache, rather arbitrarily
5364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for k in self.ftpcache.keys():
5374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k != key:
5384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = self.ftpcache[k]
5394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    del self.ftpcache[k]
5404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v.close()
5414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
5424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not key in self.ftpcache:
5434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.ftpcache[key] = \
5444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    ftpwrapper(user, passwd, host, port, dirs)
5454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not file: type = 'D'
5464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else: type = 'I'
5474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for attr in attrs:
5484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                attr, value = splitvalue(attr)
5494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if attr.lower() == 'type' and \
5504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
5514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    type = value.upper()
5524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
5534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            mtype = mimetypes.guess_type("ftp:" + url)[0]
5544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            headers = ""
5554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if mtype:
5564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                headers += "Content-Type: %s\n" % mtype
5574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if retrlen is not None and retrlen >= 0:
5584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                headers += "Content-Length: %d\n" % retrlen
5594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            headers = mimetools.Message(StringIO(headers))
5604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return addinfourl(fp, headers, "ftp:" + url)
5614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ftperrors(), msg:
5624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('ftp error', msg), sys.exc_info()[2]
5634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def open_data(self, url, data=None):
5654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Use "data" URL."""
5664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not isinstance(url, str):
5674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
5684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # ignore POSTed data
5694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        #
5704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # syntax of data URLs:
5714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
5724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # mediatype := [ type "/" subtype ] *( ";" parameter )
5734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # data      := *urlchar
5744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # parameter := attribute "=" value
5754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import mimetools
5764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
5774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from cStringIO import StringIO
5784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ImportError:
5794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from StringIO import StringIO
5804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
5814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            [type, data] = url.split(',', 1)
5824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ValueError:
5834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError, ('data error', 'bad data URL')
5844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not type:
5854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            type = 'text/plain;charset=US-ASCII'
5864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        semi = type.rfind(';')
5874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if semi >= 0 and '=' not in type[semi:]:
5884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            encoding = type[semi+1:]
5894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            type = type[:semi]
5904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
5914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            encoding = ''
5924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg = []
5934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
5944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                            time.gmtime(time.time())))
5954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg.append('Content-type: %s' % type)
5964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if encoding == 'base64':
5974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            data = base64.decodestring(data)
5984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
5994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            data = unquote(data)
6004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg.append('Content-Length: %d' % len(data))
6014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg.append('')
6024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg.append(data)
6034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        msg = '\n'.join(msg)
6044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        f = StringIO(msg)
6054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        headers = mimetools.Message(f, 0)
6064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        #f.fileno = None     # needed for addinfourl
6074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return addinfourl(f, headers, url)
6084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass FancyURLopener(URLopener):
6114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Derived class with handlers for errors we can handle (perhaps)."""
6124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, *args, **kwargs):
6144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        URLopener.__init__(self, *args, **kwargs)
6154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.auth_cache = {}
6164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.tries = 0
6174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.maxtries = 10
6184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_default(self, url, fp, errcode, errmsg, headers):
6204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Default error handling -- don't raise an exception."""
6214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return addinfourl(fp, headers, "http:" + url, errcode)
6224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
6244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Error 302 -- relocated (temporarily)."""
6254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.tries += 1
6264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.maxtries and self.tries >= self.maxtries:
6274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if hasattr(self, "http_error_500"):
6284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                meth = self.http_error_500
6294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
6304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                meth = self.http_error_default
6314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.tries = 0
6324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return meth(url, fp, 500,
6334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        "Internal Server Error: Redirect Recursion", headers)
6344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
6354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                        data)
6364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.tries = 0
6374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return result
6384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
6404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if 'location' in headers:
6414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            newurl = headers['location']
6424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif 'uri' in headers:
6434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            newurl = headers['uri']
6444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
6454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return
6464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        fp.close()
6474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # In case the server sent a relative URL, join with original:
6484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        newurl = basejoin(self.type + ":" + url, newurl)
6494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # For security reasons we do not allow redirects to protocols
6514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # other than HTTP, HTTPS or FTP.
6524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        newurl_lower = newurl.lower()
6534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not (newurl_lower.startswith('http://') or
6544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                newurl_lower.startswith('https://') or
6554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                newurl_lower.startswith('ftp://')):
6564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise IOError('redirect error', errcode,
6574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          errmsg + " - Redirection to url '%s' is not allowed" %
6584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          newurl,
6594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          headers)
6604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.open(newurl)
6624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
6644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Error 301 -- also relocated (permanently)."""
6654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
6664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
6684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Error 303 -- also relocated (essentially identical to 302)."""
6694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
6704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
6724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Error 307 -- relocated, but turn POST into error."""
6734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
6744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
6754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
6764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.http_error_default(url, fp, errcode, errmsg, headers)
6774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
6794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Error 401 -- authentication required.
6804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        This function supports Basic authentication only."""
6814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not 'www-authenticate' in headers:
6824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            URLopener.http_error_default(self, url, fp,
6834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                         errcode, errmsg, headers)
6844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        stuff = headers['www-authenticate']
6854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
6864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
6874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not match:
6884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            URLopener.http_error_default(self, url, fp,
6894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                         errcode, errmsg, headers)
6904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        scheme, realm = match.groups()
6914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if scheme.lower() != 'basic':
6924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            URLopener.http_error_default(self, url, fp,
6934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                         errcode, errmsg, headers)
6944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name = 'retry_' + self.type + '_basic_auth'
6954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
6964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return getattr(self,name)(url, realm)
6974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
6984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return getattr(self,name)(url, realm, data)
6994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
7014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Error 407 -- proxy authentication required.
7024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        This function supports Basic authentication only."""
7034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not 'proxy-authenticate' in headers:
7044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            URLopener.http_error_default(self, url, fp,
7054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                         errcode, errmsg, headers)
7064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        stuff = headers['proxy-authenticate']
7074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
7084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
7094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not match:
7104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            URLopener.http_error_default(self, url, fp,
7114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                         errcode, errmsg, headers)
7124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        scheme, realm = match.groups()
7134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if scheme.lower() != 'basic':
7144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            URLopener.http_error_default(self, url, fp,
7154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                         errcode, errmsg, headers)
7164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name = 'retry_proxy_' + self.type + '_basic_auth'
7174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
7184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return getattr(self,name)(url, realm)
7194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
7204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return getattr(self,name)(url, realm, data)
7214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def retry_proxy_http_basic_auth(self, url, realm, data=None):
7234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, selector = splithost(url)
7244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        newurl = 'http://' + host + selector
7254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxy = self.proxies['http']
7264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        urltype, proxyhost = splittype(proxy)
7274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyhost, proxyselector = splithost(proxyhost)
7284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        i = proxyhost.find('@') + 1
7294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyhost = proxyhost[i:]
7304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user, passwd = self.get_user_passwd(proxyhost, realm, i)
7314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not (user or passwd): return None
7324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
7334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.proxies['http'] = 'http://' + proxyhost + proxyselector
7344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
7354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl)
7364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
7374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl, data)
7384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def retry_proxy_https_basic_auth(self, url, realm, data=None):
7404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, selector = splithost(url)
7414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        newurl = 'https://' + host + selector
7424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxy = self.proxies['https']
7434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        urltype, proxyhost = splittype(proxy)
7444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyhost, proxyselector = splithost(proxyhost)
7454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        i = proxyhost.find('@') + 1
7464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyhost = proxyhost[i:]
7474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user, passwd = self.get_user_passwd(proxyhost, realm, i)
7484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not (user or passwd): return None
7494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
7504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.proxies['https'] = 'https://' + proxyhost + proxyselector
7514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
7524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl)
7534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
7544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl, data)
7554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def retry_http_basic_auth(self, url, realm, data=None):
7574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, selector = splithost(url)
7584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        i = host.find('@') + 1
7594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = host[i:]
7604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user, passwd = self.get_user_passwd(host, realm, i)
7614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not (user or passwd): return None
7624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
7634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        newurl = 'http://' + host + selector
7644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
7654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl)
7664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
7674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl, data)
7684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def retry_https_basic_auth(self, url, realm, data=None):
7704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, selector = splithost(url)
7714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        i = host.find('@') + 1
7724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = host[i:]
7734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user, passwd = self.get_user_passwd(host, realm, i)
7744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not (user or passwd): return None
7754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
7764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        newurl = 'https://' + host + selector
7774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if data is None:
7784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl)
7794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
7804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.open(newurl, data)
7814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_user_passwd(self, host, realm, clear_cache=0):
7834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        key = realm + '@' + host.lower()
7844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if key in self.auth_cache:
7854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if clear_cache:
7864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                del self.auth_cache[key]
7874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
7884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return self.auth_cache[key]
7894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        user, passwd = self.prompt_user_passwd(host, realm)
7904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if user or passwd: self.auth_cache[key] = (user, passwd)
7914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return user, passwd
7924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def prompt_user_passwd(self, host, realm):
7944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Override this in a GUI environment!"""
7954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import getpass
7964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
7974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            user = raw_input("Enter username for %s at %s: " % (realm,
7984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                                                host))
7994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
8004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                (user, realm, host))
8014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return user, passwd
8024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except KeyboardInterrupt:
8034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            print
8044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return None, None
8054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Utility functions
8084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_localhost = None
8104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef localhost():
8114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return the IP address of the magic hostname 'localhost'."""
8124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _localhost
8134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _localhost is None:
8144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _localhost = socket.gethostbyname('localhost')
8154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _localhost
8164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_thishost = None
8184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef thishost():
8194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return the IP address of the current host."""
8204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _thishost
8214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _thishost is None:
8224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _thishost = socket.gethostbyname(socket.gethostname())
8234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _thishost
8244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_ftperrors = None
8264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef ftperrors():
8274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return the set of errors raised by the FTP class."""
8284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _ftperrors
8294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _ftperrors is None:
8304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import ftplib
8314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _ftperrors = ftplib.all_errors
8324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _ftperrors
8334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_noheaders = None
8354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef noheaders():
8364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return an empty mimetools.Message object."""
8374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _noheaders
8384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _noheaders is None:
8394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import mimetools
8404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
8414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from cStringIO import StringIO
8424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ImportError:
8434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            from StringIO import StringIO
8444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _noheaders = mimetools.Message(StringIO(), 0)
8454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _noheaders.fp.close()   # Recycle file descriptor
8464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _noheaders
8474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Utility classes
8504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass ftpwrapper:
8524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Class used by open_ftp() for cache of open FTP connections."""
8534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, user, passwd, host, port, dirs,
8554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
8564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 persistent=True):
8574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.user = user
8584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.passwd = passwd
8594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.host = host
8604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.port = port
8614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.dirs = dirs
8624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.timeout = timeout
8634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.refcount = 0
8644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.keepalive = persistent
8654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.init()
8664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def init(self):
8684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import ftplib
8694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.busy = 0
8704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.ftp = ftplib.FTP()
8714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.ftp.connect(self.host, self.port, self.timeout)
8724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.ftp.login(self.user, self.passwd)
8734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for dir in self.dirs:
8744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.ftp.cwd(dir)
8754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def retrfile(self, file, type):
8774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import ftplib
8784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.endtransfer()
8794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
8804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else: cmd = 'TYPE ' + type; isdir = 0
8814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
8824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.ftp.voidcmd(cmd)
8834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ftplib.all_errors:
8844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.init()
8854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.ftp.voidcmd(cmd)
8864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        conn = None
8874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if file and not isdir:
8884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Try to retrieve as a file
8894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
8904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                cmd = 'RETR ' + file
8914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                conn, retrlen = self.ftp.ntransfercmd(cmd)
8924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except ftplib.error_perm, reason:
8934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if str(reason)[:3] != '550':
8944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
8954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not conn:
8964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Set transfer mode to ASCII!
8974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.ftp.voidcmd('TYPE A')
8984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Try a directory listing. Verify that directory exists.
8994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if file:
9004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                pwd = self.ftp.pwd()
9014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                try:
9024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    try:
9034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        self.ftp.cwd(file)
9044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    except ftplib.error_perm, reason:
9054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
9064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                finally:
9074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    self.ftp.cwd(pwd)
9084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                cmd = 'LIST ' + file
9094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
9104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                cmd = 'LIST'
9114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            conn, retrlen = self.ftp.ntransfercmd(cmd)
9124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.busy = 1
9134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
9144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.refcount += 1
9154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        conn.close()
9164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Pass back both a suitably decorated object and a retrieval length
9174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return (ftpobj, retrlen)
9184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def endtransfer(self):
9204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not self.busy:
9214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return
9224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.busy = 0
9234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
9244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.ftp.voidresp()
9254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ftperrors():
9264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pass
9274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def close(self):
9294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.keepalive = False
9304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.refcount <= 0:
9314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.real_close()
9324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def file_close(self):
9344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.endtransfer()
9354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.refcount -= 1
9364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.refcount <= 0 and not self.keepalive:
9374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.real_close()
9384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def real_close(self):
9404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.endtransfer()
9414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
9424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.ftp.close()
9434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ftperrors():
9444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pass
9454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addbase:
9474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Base class for addinfo and addclosehook."""
9484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, fp):
9504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.fp = fp
9514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.read = self.fp.read
9524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.readline = self.fp.readline
9534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
9544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if hasattr(self.fp, "fileno"):
9554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.fileno = self.fp.fileno
9564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
9574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.fileno = lambda: None
9584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if hasattr(self.fp, "__iter__"):
9594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.__iter__ = self.fp.__iter__
9604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if hasattr(self.fp, "next"):
9614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.next = self.fp.next
9624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __repr__(self):
9644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
9654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                             id(self), self.fp)
9664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def close(self):
9684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.read = None
9694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.readline = None
9704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.readlines = None
9714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.fileno = None
9724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.fp: self.fp.close()
9734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.fp = None
9744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addclosehook(addbase):
9764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Class to add a close hook to an open file."""
9774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, fp, closehook, *hookargs):
9794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        addbase.__init__(self, fp)
9804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.closehook = closehook
9814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.hookargs = hookargs
9824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def close(self):
9844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.closehook:
9854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.closehook(*self.hookargs)
9864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.closehook = None
9874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self.hookargs = None
9884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        addbase.close(self)
9894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addinfo(addbase):
9914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """class to add an info() method to an open file."""
9924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, fp, headers):
9944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        addbase.__init__(self, fp)
9954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.headers = headers
9964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def info(self):
9984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.headers
9994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addinfourl(addbase):
10014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """class to add info() and geturl() methods to an open file."""
10024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, fp, headers, url, code=None):
10044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        addbase.__init__(self, fp)
10054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.headers = headers
10064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.url = url
10074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.code = code
10084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def info(self):
10104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.headers
10114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def getcode(self):
10134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.code
10144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def geturl(self):
10164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.url
10174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Utilities to parse URLs (most of these return None for missing parts):
10204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# unwrap('<URL:type://host/path>') --> 'type://host/path'
10214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splittype('type:opaquestring') --> 'type', 'opaquestring'
10224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splithost('//host[:port]/path') --> 'host[:port]', '/path'
10234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
10244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitpasswd('user:passwd') -> 'user', 'passwd'
10254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitport('host:port') --> 'host', 'port'
10264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitquery('/path?query') --> '/path', 'query'
10274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splittag('/path#tag') --> '/path', 'tag'
10284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitattr('/path;attr1=value1;attr2=value2;...') ->
10294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao#   '/path', ['attr1=value1', 'attr2=value2', ...]
10304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitvalue('attr=value') --> 'attr', 'value'
10314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# unquote('abc%20def') -> 'abc def'
10324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# quote('abc def') -> 'abc%20def')
10334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotry:
10354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    unicode
10364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoexcept NameError:
10374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _is_unicode(x):
10384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return 0
10394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse:
10404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _is_unicode(x):
10414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return isinstance(x, unicode)
10424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef toBytes(url):
10444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """toBytes(u"URL") --> 'URL'."""
10454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Most URL schemes require ASCII. If that changes, the conversion
10464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # can be relaxed
10474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _is_unicode(url):
10484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
10494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            url = url.encode("ASCII")
10504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except UnicodeError:
10514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise UnicodeError("URL " + repr(url) +
10524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                               " contains non-ASCII characters")
10534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return url
10544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unwrap(url):
10564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
10574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    url = url.strip()
10584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if url[:1] == '<' and url[-1:] == '>':
10594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        url = url[1:-1].strip()
10604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if url[:4] == 'URL:': url = url[4:].strip()
10614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return url
10624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_typeprog = None
10644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splittype(url):
10654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
10664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _typeprog
10674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _typeprog is None:
10684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
10694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _typeprog = re.compile('^([^/:]+):')
10704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _typeprog.match(url)
10724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match:
10734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        scheme = match.group(1)
10744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return scheme.lower(), url[len(scheme) + 1:]
10754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return None, url
10764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_hostprog = None
10784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splithost(url):
10794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
10804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _hostprog
10814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _hostprog is None:
10824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
10834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _hostprog = re.compile('^//([^/?]*)(.*)$')
10844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _hostprog.match(url)
10864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match:
10874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host_port = match.group(1)
10884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path = match.group(2)
10894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if path and not path.startswith('/'):
10904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            path = '/' + path
10914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return host_port, path
10924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return None, url
10934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_userprog = None
10954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splituser(host):
10964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
10974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _userprog
10984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _userprog is None:
10994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _userprog = re.compile('^(.*)@(.*)$')
11014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _userprog.match(host)
11034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match: return match.group(1, 2)
11044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return None, host
11054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_passwdprog = None
11074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitpasswd(user):
11084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
11094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _passwdprog
11104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _passwdprog is None:
11114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
11134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _passwdprog.match(user)
11154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match: return match.group(1, 2)
11164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return user, None
11174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splittag('/path#tag') --> '/path', 'tag'
11194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_portprog = None
11204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitport(host):
11214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splitport('host:port') --> 'host', 'port'."""
11224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _portprog
11234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _portprog is None:
11244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _portprog = re.compile('^(.*):([0-9]+)$')
11264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _portprog.match(host)
11284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match: return match.group(1, 2)
11294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return host, None
11304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_nportprog = None
11324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitnport(host, defport=-1):
11334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Split host and port, returning numeric port.
11344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Return given default port if no ':' found; defaults to -1.
11354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Return numerical port if a valid number are found after ':'.
11364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Return None if ':' but not a valid number."""
11374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _nportprog
11384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _nportprog is None:
11394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _nportprog = re.compile('^(.*):(.*)$')
11414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _nportprog.match(host)
11434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match:
11444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host, port = match.group(1, 2)
11454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
11464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not port: raise ValueError, "no digits"
11474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            nport = int(port)
11484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ValueError:
11494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            nport = None
11504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return host, nport
11514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return host, defport
11524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_queryprog = None
11544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitquery(url):
11554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splitquery('/path?query') --> '/path', 'query'."""
11564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _queryprog
11574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _queryprog is None:
11584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _queryprog = re.compile('^(.*)\?([^?]*)$')
11604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _queryprog.match(url)
11624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match: return match.group(1, 2)
11634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return url, None
11644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_tagprog = None
11664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splittag(url):
11674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splittag('/path#tag') --> '/path', 'tag'."""
11684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _tagprog
11694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _tagprog is None:
11704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _tagprog = re.compile('^(.*)#([^#]*)$')
11724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _tagprog.match(url)
11744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match: return match.group(1, 2)
11754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return url, None
11764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitattr(url):
11784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splitattr('/path;attr1=value1;attr2=value2;...') ->
11794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        '/path', ['attr1=value1', 'attr2=value2', ...]."""
11804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    words = url.split(';')
11814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return words[0], words[1:]
11824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_valueprog = None
11844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitvalue(attr):
11854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """splitvalue('attr=value') --> 'attr', 'value'."""
11864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global _valueprog
11874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _valueprog is None:
11884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
11894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _valueprog = re.compile('^([^=]*)=(.*)$')
11904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    match = _valueprog.match(attr)
11924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if match: return match.group(1, 2)
11934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return attr, None
11944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# urlparse contains a duplicate of this method to avoid a circular import.  If
11964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# you update this method, also update the copy in urlparse.  This code
11974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# duplication does not exist in Python3.
11984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_hexdig = '0123456789ABCDEFabcdef'
12004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_hextochr = dict((a + b, chr(int(a + b, 16)))
12014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 for a in _hexdig for b in _hexdig)
12024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_asciire = re.compile('([\x00-\x7f]+)')
12034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unquote(s):
12054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """unquote('abc%20def') -> 'abc def'."""
12064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if _is_unicode(s):
12074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if '%' not in s:
12084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return s
12094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        bits = _asciire.split(s)
12104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        res = [bits[0]]
12114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        append = res.append
12124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for i in range(1, len(bits), 2):
12134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            append(unquote(str(bits[i])).decode('latin1'))
12144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            append(bits[i + 1])
12154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return ''.join(res)
12164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    bits = s.split('%')
12184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # fastpath
12194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if len(bits) == 1:
12204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return s
12214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    res = [bits[0]]
12224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    append = res.append
12234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for item in bits[1:]:
12244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
12254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            append(_hextochr[item[:2]])
12264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            append(item[2:])
12274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except KeyError:
12284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            append('%')
12294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            append(item)
12304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return ''.join(res)
12314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unquote_plus(s):
12334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """unquote('%7e/abc+def') -> '~/abc def'"""
12344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    s = s.replace('+', ' ')
12354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return unquote(s)
12364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoalways_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
12384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao               'abcdefghijklmnopqrstuvwxyz'
12394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao               '0123456789' '_.-')
12404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_safe_map = {}
12414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofor i, c in zip(xrange(256), str(bytearray(xrange(256)))):
12424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
12434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_safe_quoters = {}
12444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef quote(s, safe='/'):
12464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """quote('abc def') -> 'abc%20def'
12474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Each part of a URL, e.g. the path info, the query, etc., has a
12494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    different set of reserved characters that must be quoted.
12504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
12524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    the following reserved characters.
12534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
12554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                  "$" | ","
12564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Each of these characters is reserved in some component of a URL,
12584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    but not necessarily in all of them.
12594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    By default, the quote function is intended for quoting the path
12614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    section of a URL.  Thus, it will not encode '/'.  This character
12624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    is reserved, but in typical usage the quote function is being
12634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    called on a path where the existing slash characters are used as
12644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    reserved characters.
12654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
12664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # fastpath
12674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not s:
12684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if s is None:
12694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise TypeError('None object cannot be quoted')
12704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return s
12714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    cachekey = (safe, always_safe)
12724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    try:
12734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        (quoter, safe) = _safe_quoters[cachekey]
12744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except KeyError:
12754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        safe_map = _safe_map.copy()
12764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        safe_map.update([(c, c) for c in safe])
12774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        quoter = safe_map.__getitem__
12784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        safe = always_safe + safe
12794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _safe_quoters[cachekey] = (quoter, safe)
12804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not s.rstrip(safe):
12814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return s
12824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return ''.join(map(quoter, s))
12834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef quote_plus(s, safe=''):
12854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Quote the query fragment of a URL; replacing ' ' with '+'"""
12864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if ' ' in s:
12874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        s = quote(s, safe + ' ')
12884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return s.replace(' ', '+')
12894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return quote(s, safe)
12904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlencode(query, doseq=0):
12924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Encode a sequence of two-element tuples or dictionary into a URL query string.
12934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If any values in the query arg are sequences and doseq is true, each
12954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    sequence element is converted to a separate parameter.
12964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If the query arg is a sequence of two-element tuples, the order of the
12984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    parameters in the output will match the order of parameters in the
12994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    input.
13004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
13014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if hasattr(query,"items"):
13034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # mapping objects
13044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        query = query.items()
13054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
13064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # it's a bother at times that strings and string-like objects are
13074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # sequences...
13084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
13094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # non-sequence items should not work with len()
13104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # non-empty strings will fail this
13114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if len(query) and not isinstance(query[0], tuple):
13124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise TypeError
13134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # zero-length sequences of all types will get here and succeed,
13144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # but that's a minor nit - since the original implementation
13154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # allowed empty dicts that type of behavior probably should be
13164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # preserved for consistency
13174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except TypeError:
13184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            ty,va,tb = sys.exc_info()
13194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise TypeError, "not a valid non-string sequence or mapping object", tb
13204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    l = []
13224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not doseq:
13234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # preserve old behavior
13244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for k, v in query:
13254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            k = quote_plus(str(k))
13264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            v = quote_plus(str(v))
13274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            l.append(k + '=' + v)
13284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
13294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for k, v in query:
13304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            k = quote_plus(str(k))
13314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if isinstance(v, str):
13324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                v = quote_plus(v)
13334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                l.append(k + '=' + v)
13344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif _is_unicode(v):
13354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # is there a reasonable way to convert to ASCII?
13364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # encode generates a string, but "replace" or "ignore"
13374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # lose information and "strict" can raise UnicodeError
13384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                v = quote_plus(v.encode("ASCII","replace"))
13394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                l.append(k + '=' + v)
13404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
13414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                try:
13424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # is this a sufficient test for sequence-ness?
13434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    len(v)
13444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                except TypeError:
13454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # not a sequence
13464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = quote_plus(str(v))
13474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    l.append(k + '=' + v)
13484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
13494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # loop over the sequence
13504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    for elt in v:
13514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        l.append(k + '=' + quote_plus(str(elt)))
13524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return '&'.join(l)
13534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Proxy handling
13554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef getproxies_environment():
13564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return a dictionary of scheme -> proxy server URL mappings.
13574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Scan the environment for variables named <scheme>_proxy;
13594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    this seems to be the standard convention.  If you need a
13604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    different way, you can pass a proxies dictionary to the
13614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    [Fancy]URLopener constructor.
13624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
13644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    proxies = {}
13654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for name, value in os.environ.items():
13664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name = name.lower()
13674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if value and name[-6:] == '_proxy':
13684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxies[name[:-6]] = value
13694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return proxies
13704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef proxy_bypass_environment(host):
13724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Test if proxies should not be used for a particular host.
13734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Checks the environment for a variable named no_proxy, which should
13754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    be a list of DNS suffixes separated by commas, or '*' for all hosts.
13764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
13774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
13784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # '*' is special case for always bypass
13794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if no_proxy == '*':
13804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return 1
13814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # strip port off host
13824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    hostonly, port = splitport(host)
13834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # check if the host ends with any of the DNS suffixes
13844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
13854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for name in no_proxy_list:
13864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if name and (hostonly.endswith(name) or host.endswith(name)):
13874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return 1
13884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # otherwise, don't bypass
13894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return 0
13904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif sys.platform == 'darwin':
13934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    from _scproxy import _get_proxy_settings, _get_proxies
13944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def proxy_bypass_macosx_sysconf(host):
13964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
13974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Return True iff this host shouldn't be accessed using a proxy
13984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        This function uses the MacOSX framework SystemConfiguration
14004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        to fetch the proxy information.
14014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
14024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import re
14034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import socket
14044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        from fnmatch import fnmatch
14054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        hostonly, port = splitport(host)
14074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        def ip2num(ipAddr):
14094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            parts = ipAddr.split('.')
14104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            parts = map(int, parts)
14114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if len(parts) != 4:
14124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                parts = (parts + [0, 0, 0, 0])[:4]
14134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
14144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxy_settings = _get_proxy_settings()
14164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Check for simple host names:
14184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if '.' not in host:
14194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if proxy_settings['exclude_simple']:
14204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return True
14214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        hostIP = None
14234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for value in proxy_settings.get('exceptions', ()):
14254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Items in the list are strings like these: *.local, 169.254/16
14264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not value: continue
14274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
14294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if m is not None:
14304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if hostIP is None:
14314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    try:
14324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        hostIP = socket.gethostbyname(hostonly)
14334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        hostIP = ip2num(hostIP)
14344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    except socket.error:
14354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        continue
14364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                base = ip2num(m.group(1))
14384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                mask = m.group(2)
14394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if mask is None:
14404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    mask = 8 * (m.group(1).count('.') + 1)
14414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
14434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    mask = int(mask[1:])
14444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                mask = 32 - mask
14454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if (hostIP >> mask) == (base >> mask):
14474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return True
14484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif fnmatch(host, value):
14504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return True
14514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
14534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def getproxies_macosx_sysconf():
14554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
14564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        This function uses the MacOSX framework SystemConfiguration
14584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        to fetch the proxy information.
14594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
14604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return _get_proxies()
14614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def proxy_bypass(host):
14634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if getproxies_environment():
14644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return proxy_bypass_environment(host)
14654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
14664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return proxy_bypass_macosx_sysconf(host)
14674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def getproxies():
14694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return getproxies_environment() or getproxies_macosx_sysconf()
14704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelif os.name == 'nt':
14724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def getproxies_registry():
14734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
14744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Win32 uses the registry to store proxies.
14764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
14784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxies = {}
14794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
14804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            import _winreg
14814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ImportError:
14824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Std module, so should be around - but you never know!
14834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return proxies
14844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
14854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
14864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
14874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxyEnable = _winreg.QueryValueEx(internetSettings,
14884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                               'ProxyEnable')[0]
14894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if proxyEnable:
14904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # Returned as Unicode but problems if not converted to ASCII
14914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                proxyServer = str(_winreg.QueryValueEx(internetSettings,
14924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                                       'ProxyServer')[0])
14934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if '=' in proxyServer:
14944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # Per-protocol settings
14954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    for p in proxyServer.split(';'):
14964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        protocol, address = p.split('=', 1)
14974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        # See if address has a type:// prefix
14984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        import re
14994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        if not re.match('^([^/:]+)://', address):
15004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                            address = '%s://%s' % (protocol, address)
15014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        proxies[protocol] = address
15024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
15034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # Use one setting for all protocols
15044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if proxyServer[:5] == 'http:':
15054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        proxies['http'] = proxyServer
15064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    else:
15074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        proxies['http'] = 'http://%s' % proxyServer
15084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        proxies['https'] = 'https://%s' % proxyServer
15094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        proxies['ftp'] = 'ftp://%s' % proxyServer
15104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            internetSettings.Close()
15114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except (WindowsError, ValueError, TypeError):
15124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Either registry key not found etc, or the value in an
15134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # unexpected format.
15144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # proxies already set up to be empty so nothing to do
15154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pass
15164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return proxies
15174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def getproxies():
15194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
15204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Returns settings gathered from the environment, if specified,
15224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        or the registry.
15234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
15254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return getproxies_environment() or getproxies_registry()
15264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def proxy_bypass_registry(host):
15284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
15294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            import _winreg
15304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            import re
15314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ImportError:
15324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Std modules, so should be around - but you never know!
15334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return 0
15344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
15354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
15364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
15374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxyEnable = _winreg.QueryValueEx(internetSettings,
15384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                               'ProxyEnable')[0]
15394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
15404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                                                     'ProxyOverride')[0])
15414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # ^^^^ Returned as Unicode but problems if not converted to ASCII
15424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except WindowsError:
15434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return 0
15444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not proxyEnable or not proxyOverride:
15454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return 0
15464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # try to make a host list from name and IP address.
15474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        rawHost, port = splitport(host)
15484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = [rawHost]
15494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
15504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            addr = socket.gethostbyname(rawHost)
15514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if addr != rawHost:
15524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                host.append(addr)
15534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except socket.error:
15544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pass
15554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
15564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fqdn = socket.getfqdn(rawHost)
15574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if fqdn != rawHost:
15584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                host.append(fqdn)
15594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except socket.error:
15604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pass
15614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # make a check value list from the registry entry: replace the
15624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # '<local>' string by the localhost entry and the corresponding
15634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # canonical entry.
15644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        proxyOverride = proxyOverride.split(';')
15654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # now check if we match one of the registry values.
15664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for test in proxyOverride:
15674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if test == '<local>':
15684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if '.' not in rawHost:
15694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return 1
15704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            test = test.replace(".", r"\.")     # mask dots
15714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            test = test.replace("*", r".*")     # change glob sequence
15724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            test = test.replace("?", r".")      # change glob char
15734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for val in host:
15744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # print "%s <--> %s" %( test, val )
15754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if re.match(test, val, re.I):
15764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return 1
15774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return 0
15784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def proxy_bypass(host):
15804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
15814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Returns settings gathered from the environment, if specified,
15834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        or the registry.
15844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
15864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if getproxies_environment():
15874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return proxy_bypass_environment(host)
15884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
15894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return proxy_bypass_registry(host)
15904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse:
15924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # By default use environment variables
15934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    getproxies = getproxies_environment
15944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    proxy_bypass = proxy_bypass_environment
15954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Test and time quote() and unquote()
15974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef test1():
15984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    s = ''
15994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for i in range(256): s = s + chr(i)
16004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    s = s*4
16014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    t0 = time.time()
16024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    qs = quote(s)
16034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    uqs = unquote(qs)
16044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    t1 = time.time()
16054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if uqs != s:
16064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        print 'Wrong!'
16074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    print repr(s)
16084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    print repr(qs)
16094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    print repr(uqs)
16104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    print round(t1 - t0, 3), 'sec'
16114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef reporthook(blocknum, blocksize, totalsize):
16144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Report during remote transfers
16154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    print "Block number: %d, Block size: %d, Total size: %d" % (
16164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        blocknum, blocksize, totalsize)
1617