10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Open an arbitrary URL.
20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
30a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSee the following document for more info on URLs:
40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"Names and Addresses, URIs, URLs, URNs, URCs", at
50a8c90248264a8b26970b4473770bcc3df8515fJosh Gaohttp://www.w3.org/pub/WWW/Addressing/Overview.html
60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
70a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSee also the HTTP spec (from which the error codes are derived):
80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"HTTP - Hypertext Transfer Protocol", at
90a8c90248264a8b26970b4473770bcc3df8515fJosh Gaohttp://www.w3.org/pub/WWW/Protocols/
100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
110a8c90248264a8b26970b4473770bcc3df8515fJosh GaoRelated standards and specs:
120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao- RFC1808: the "relative URL" spec. (authoritative status)
130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao- RFC1738 - the "URL standard". (authoritative status)
140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao- RFC1630 - the "URI spec". (informational status)
150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
160a8c90248264a8b26970b4473770bcc3df8515fJosh GaoThe object returned by URLopener().open(file) will differ per
170a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoprotocol.  All you know is that is has methods read(), readline(),
180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoreadlines(), fileno(), close() and info().  The read*(), fileno()
190a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoand close() methods work like those of open files.
200a8c90248264a8b26970b4473770bcc3df8515fJosh GaoThe info() method returns a mimetools.Message object which can be
210a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoused to query various info about the object, if available.
220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao(mimetools.Message objects are queried with the getheader() method.)
230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""
240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
250a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport string
260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport socket
270a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport os
280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport time
290a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys
300a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport base64
310a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re
320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom urlparse import urljoin as basejoin
340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           "urlencode", "url2pathname", "pathname2url", "splittag",
380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           "splitnport", "splitquery", "splitattr", "splitvalue",
410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           "getproxies"]
420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__version__ = '1.17'    # XXX This version is not always updated :-(
440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
450a8c90248264a8b26970b4473770bcc3df8515fJosh GaoMAXFTPCACHE = 10        # Trim the ftp cache beyond this size
460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Helper for non-unix systems
480a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif os.name == 'nt':
490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from nturl2path import url2pathname, pathname2url
500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelif os.name == 'riscos':
510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from rourl2path import url2pathname, pathname2url
520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse:
530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def url2pathname(pathname):
540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """OS-specific conversion from a relative URL of the 'file' scheme
550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        to a file system path; not recommended for general use."""
560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return unquote(pathname)
570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def pathname2url(pathname):
590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """OS-specific conversion from a file system path to a relative URL
600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        of the 'file' scheme; not recommended for general use."""
610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return quote(pathname)
620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# This really consists of two pieces:
640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (1) a class which handles opening of all sorts of URLs
650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     (plus assorted utilities etc.)
660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (2) a set of functions for parsing URLs
670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# XXX Should these be separated out into different modules?
680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Shortcut for basic usage
710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_urlopener = None
720a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlopen(url, data=None, proxies=None):
730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Create a file-like object for the specified URL to read from."""
740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from warnings import warnpy3k
750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao             "favor of urllib2.urlopen()", stacklevel=2)
770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _urlopener
790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if proxies is not None:
800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        opener = FancyURLopener(proxies=proxies)
810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    elif not _urlopener:
820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        opener = FancyURLopener()
830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _urlopener = opener
840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        opener = _urlopener
860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if data is None:
870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return opener.open(url)
880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return opener.open(url, data)
900a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlretrieve(url, filename=None, reporthook=None, data=None):
910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _urlopener
920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not _urlopener:
930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _urlopener = FancyURLopener()
940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _urlopener.retrieve(url, filename, reporthook, data)
950a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlcleanup():
960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _urlopener:
970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _urlopener.cleanup()
980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _safe_quoters.clear()
990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ftpcache.clear()
1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# check for SSL
1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    import ssl
1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept:
1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _have_ssl = False
1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse:
1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _have_ssl = True
1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# exception raised when downloaded size does not match content-length
1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ContentTooShortError(IOError):
1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, message, content):
1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        IOError.__init__(self, message)
1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.content = content
1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoftpcache = {}
1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass URLopener:
1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Class to open URLs.
1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    This is a class rather than just a subroutine because we may need
1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    more than one set of global protocol-specific options.
1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Note -- this is a base class for those who don't want the
1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    automatic handling of errors type 302 (relocated) and 401
1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    (authorization needed)."""
1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __tempfiles = None
1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    version = "Python-urllib/%s" % __version__
1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Constructor
1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, proxies=None, **x509):
1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if proxies is None:
1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxies = getproxies()
1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.proxies = proxies
1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.key_file = x509.get('key_file')
1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.cert_file = x509.get('cert_file')
1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.addheaders = [('User-Agent', self.version)]
1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.__tempfiles = []
1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.__unlink = os.unlink # See cleanup()
1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tempcache = None
1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Undocumented feature: if you assign {} to tempcache,
1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # it is used to cache files retrieved with
1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # self.retrieve().  This is not enabled by default
1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # since it does not work for changing documents (and I
1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # haven't got the logic to check expiration headers
1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # yet).
1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.ftpcache = ftpcache
1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Undocumented feature: you can use a different
1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # ftp cache by assigning to the .ftpcache member;
1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # in case you want logically independent URL openers
1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # XXX This is not threadsafe.  Bah.
1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __del__(self):
1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.close()
1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.cleanup()
1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def cleanup(self):
1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # This code sometimes runs when the rest of this module
1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # has already been deleted, so it can't use any globals
1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # or import anything.
1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.__tempfiles:
1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for file in self.__tempfiles:
1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.__unlink(file)
1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except OSError:
1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    pass
1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del self.__tempfiles[:]
1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.tempcache:
1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.tempcache.clear()
1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def addheader(self, *args):
1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Add a header to be used by the HTTP interface only
1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        e.g. u.addheader('Accept', 'sound/basic')"""
1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.addheaders.append(args)
1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # External interface
1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open(self, fullurl, data=None):
1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Use URLopener().open(file) instead of open(file, 'r')."""
1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fullurl = unwrap(toBytes(fullurl))
1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # percent encode url, fixing lame server errors for e.g, like space
1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # within url paths.
1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.tempcache and fullurl in self.tempcache:
1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            filename, headers = self.tempcache[fullurl]
1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fp = open(filename, 'rb')
1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return addinfourl(fp, headers, fullurl)
1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        urltype, url = splittype(fullurl)
1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not urltype:
1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            urltype = 'file'
1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if urltype in self.proxies:
1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy = self.proxies[urltype]
1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            urltype, proxyhost = splittype(proxy)
1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            host, selector = splithost(proxyhost)
1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            url = (host, fullurl) # Signal special case to open_*()
1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy = None
1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = 'open_' + urltype
1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.type = urltype
2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = name.replace('-', '_')
2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not hasattr(self, name):
2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if proxy:
2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return self.open_unknown_proxy(proxy, fullurl, data)
2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return self.open_unknown(fullurl, data)
2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if data is None:
2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return getattr(self, name)(url)
2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return getattr(self, name)(url, data)
2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except socket.error, msg:
2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('socket error', msg), sys.exc_info()[2]
2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_unknown(self, fullurl, data=None):
2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Overridable interface to open unknown URL type."""
2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        type, url = splittype(fullurl)
2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise IOError, ('url error', 'unknown url type', type)
2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_unknown_proxy(self, proxy, fullurl, data=None):
2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Overridable interface to open unknown URL type."""
2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        type, url = splittype(fullurl)
2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # External interface
2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def retrieve(self, url, filename=None, reporthook=None, data=None):
2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """retrieve(url) returns (filename, headers) for a local object
2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        or (tempfilename, headers) for a remote object."""
2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        url = unwrap(toBytes(url))
2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.tempcache and url in self.tempcache:
2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.tempcache[url]
2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        type, url1 = splittype(url)
2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if filename is None and (not type or type == 'file'):
2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                fp = self.open_local_file(url1)
2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                hdrs = fp.info()
2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                fp.close()
2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return url2pathname(splithost(url1)[1]), hdrs
2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except IOError:
2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pass
2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fp = self.open(url, data)
2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            headers = fp.info()
2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if filename:
2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tfp = open(filename, 'wb')
2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                import tempfile
2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                garbage, path = splittype(url)
2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                garbage, path = splithost(path or "")
2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                path, garbage = splitquery(path or "")
2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                path, garbage = splitattr(path or "")
2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                suffix = os.path.splitext(path)[1]
2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                (fd, filename) = tempfile.mkstemp(suffix)
2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.__tempfiles.append(filename)
2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tfp = os.fdopen(fd, 'wb')
2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                result = filename, headers
2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.tempcache is not None:
2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.tempcache[url] = result
2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                bs = 1024*8
2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                size = -1
2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                read = 0
2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                blocknum = 0
2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if "content-length" in headers:
2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    size = int(headers["Content-Length"])
2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if reporthook:
2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    reporthook(blocknum, bs, size)
2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                while 1:
2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    block = fp.read(bs)
2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if block == "":
2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        break
2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    read += len(block)
2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    tfp.write(block)
2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    blocknum += 1
2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if reporthook:
2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        reporthook(blocknum, bs, size)
2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            finally:
2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tfp.close()
2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        finally:
2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fp.close()
2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # raise exception if actual size does not match content-length header
2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if size >= 0 and read < size:
2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ContentTooShortError("retrieval incomplete: got only %i out "
2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                       "of %i bytes" % (read, size), result)
2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return result
2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Each method named open_<type> knows how to open that type of URL
2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_http(self, url, data=None):
2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Use HTTP protocol."""
2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import httplib
2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user_passwd = None
2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxy_passwd= None
2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(url, str):
2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            host, selector = splithost(url)
2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if host:
2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                user_passwd, host = splituser(host)
2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                host = unquote(host)
3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            realhost = host
3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            host, selector = url
3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # check whether the proxy contains authorization information
3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy_passwd, host = splituser(host)
3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # now we proceed with the url we want to obtain
3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            urltype, rest = splittype(selector)
3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            url = rest
3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            user_passwd = None
3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if urltype.lower() != 'http':
3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                realhost = None
3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                realhost, rest = splithost(rest)
3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if realhost:
3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    user_passwd, realhost = splituser(realhost)
3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if user_passwd:
3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    selector = "%s://%s%s" % (urltype, realhost, rest)
3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if proxy_bypass(realhost):
3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    host = realhost
3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            #print "proxy via http:", host, selector
3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not host: raise IOError, ('http error', 'no host given')
3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if proxy_passwd:
3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy_passwd = unquote(proxy_passwd)
3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy_auth = base64.b64encode(proxy_passwd).strip()
3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy_auth = None
3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if user_passwd:
3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            user_passwd = unquote(user_passwd)
3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            auth = base64.b64encode(user_passwd).strip()
3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            auth = None
3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        h = httplib.HTTP(host)
3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is not None:
3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            h.putrequest('POST', selector)
3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            h.putheader('Content-Length', '%d' % len(data))
3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            h.putrequest('GET', selector)
3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if auth: h.putheader('Authorization', 'Basic %s' % auth)
3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if realhost: h.putheader('Host', realhost)
3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for args in self.addheaders: h.putheader(*args)
3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        h.endheaders(data)
3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        errcode, errmsg, headers = h.getreply()
3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fp = h.getfile()
3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if errcode == -1:
3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if fp: fp.close()
3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # something went wrong with the HTTP status line
3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('http protocol error', 0,
3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            'got a bad status line', None)
3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # According to RFC 2616, "2xx" code indicates that the client's
3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # request was successfully received, understood, and accepted.
3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if (200 <= errcode < 300):
3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return addinfourl(fp, headers, "http:" + url, errcode)
3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if data is None:
3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return self.http_error(url, fp, errcode, errmsg, headers)
3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return self.http_error(url, fp, errcode, errmsg, headers, data)
3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Handle http errors.
3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Derived class can override this, or provide specific handlers
3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        named http_error_DDD where DDD is the 3-digit error code."""
3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # First check if there's a specific handler for this error
3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = 'http_error_%d' % errcode
3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(self, name):
3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            method = getattr(self, name)
3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if data is None:
3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                result = method(url, fp, errcode, errmsg, headers)
3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                result = method(url, fp, errcode, errmsg, headers, data)
3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if result: return result
3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.http_error_default(url, fp, errcode, errmsg, headers)
3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_default(self, url, fp, errcode, errmsg, headers):
3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Default error handler: close the connection and raise IOError."""
3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fp.close()
3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise IOError, ('http error', errcode, errmsg, headers)
3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _have_ssl:
3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def open_https(self, url, data=None):
3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            """Use HTTPS protocol."""
3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import httplib
3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            user_passwd = None
3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxy_passwd = None
3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if isinstance(url, str):
3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                host, selector = splithost(url)
3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if host:
3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    user_passwd, host = splituser(host)
3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    host = unquote(host)
3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                realhost = host
3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                host, selector = url
3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # here, we determine, whether the proxy contains authorization information
3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                proxy_passwd, host = splituser(host)
4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                urltype, rest = splittype(selector)
4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                url = rest
4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                user_passwd = None
4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if urltype.lower() != 'https':
4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    realhost = None
4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    realhost, rest = splithost(rest)
4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if realhost:
4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        user_passwd, realhost = splituser(realhost)
4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if user_passwd:
4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        selector = "%s://%s%s" % (urltype, realhost, rest)
4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                #print "proxy via https:", host, selector
4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not host: raise IOError, ('https error', 'no host given')
4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if proxy_passwd:
4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                proxy_passwd = unquote(proxy_passwd)
4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                proxy_auth = base64.b64encode(proxy_passwd).strip()
4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                proxy_auth = None
4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if user_passwd:
4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                user_passwd = unquote(user_passwd)
4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                auth = base64.b64encode(user_passwd).strip()
4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                auth = None
4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            h = httplib.HTTPS(host, 0,
4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                              key_file=self.key_file,
4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                              cert_file=self.cert_file)
4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if data is not None:
4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                h.putrequest('POST', selector)
4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                h.putheader('Content-Type',
4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            'application/x-www-form-urlencoded')
4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                h.putheader('Content-Length', '%d' % len(data))
4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                h.putrequest('GET', selector)
4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if auth: h.putheader('Authorization', 'Basic %s' % auth)
4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if realhost: h.putheader('Host', realhost)
4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for args in self.addheaders: h.putheader(*args)
4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            h.endheaders(data)
4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            errcode, errmsg, headers = h.getreply()
4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fp = h.getfile()
4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if errcode == -1:
4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if fp: fp.close()
4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # something went wrong with the HTTP status line
4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise IOError, ('http protocol error', 0,
4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                'got a bad status line', None)
4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # According to RFC 2616, "2xx" code indicates that the client's
4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # request was successfully received, understood, and accepted.
4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if (200 <= errcode < 300):
4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return addinfourl(fp, headers, "https:" + url, errcode)
4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if data is None:
4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return self.http_error(url, fp, errcode, errmsg, headers)
4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return self.http_error(url, fp, errcode, errmsg, headers,
4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                           data)
4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_file(self, url):
4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Use local file or FTP depending on form of URL."""
4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not isinstance(url, str):
4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open_ftp(url)
4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open_local_file(url)
4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_local_file(self, url):
4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Use local file."""
4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import mimetypes, mimetools, email.utils
4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from cStringIO import StringIO
4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from StringIO import StringIO
4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, file = splithost(url)
4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        localname = url2pathname(file)
4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stats = os.stat(localname)
4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except OSError, e:
4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError(e.errno, e.strerror, e.filename)
4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        size = stats.st_size
4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mtype = mimetypes.guess_type(url)[0]
4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        headers = mimetools.Message(StringIO(
4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (mtype or 'text/plain', size, modified)))
4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not host:
4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            urlfile = file
4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if file[:1] == '/':
4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                urlfile = 'file://' + file
4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif file[:2] == './':
4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return addinfourl(open(localname, 'rb'),
4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                              headers, urlfile)
4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, port = splitport(host)
4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not port \
4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           and socket.gethostbyname(host) in (localhost(), thishost()):
4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            urlfile = file
4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if file[:1] == '/':
4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                urlfile = 'file://' + file
4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return addinfourl(open(localname, 'rb'),
4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                              headers, urlfile)
5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise IOError, ('local file error', 'not on local host')
5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_ftp(self, url):
5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Use FTP protocol."""
5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not isinstance(url, str):
5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import mimetypes, mimetools
5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from cStringIO import StringIO
5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from StringIO import StringIO
5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, path = splithost(url)
5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not host: raise IOError, ('ftp error', 'no host given')
5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, port = splitport(host)
5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user, host = splituser(host)
5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if user: user, passwd = splitpasswd(user)
5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else: passwd = None
5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = unquote(host)
5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user = user or ''
5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        passwd = passwd or ''
5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = socket.gethostbyname(host)
5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not port:
5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import ftplib
5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            port = ftplib.FTP_PORT
5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            port = int(port)
5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        path, attrs = splitattr(path)
5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        path = unquote(path)
5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        dirs = path.split('/')
5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        dirs, file = dirs[:-1], dirs[-1]
5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if dirs and not dirs[0]: dirs = dirs[1:]
5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if dirs and not dirs[0]: dirs[0] = '/'
5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        key = user, host, port, '/'.join(dirs)
5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # XXX thread unsafe!
5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(self.ftpcache) > MAXFTPCACHE:
5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Prune the cache, rather arbitrarily
5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for k in self.ftpcache.keys():
5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if k != key:
5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    v = self.ftpcache[k]
5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    del self.ftpcache[k]
5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    v.close()
5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not key in self.ftpcache:
5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.ftpcache[key] = \
5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    ftpwrapper(user, passwd, host, port, dirs)
5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not file: type = 'D'
5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else: type = 'I'
5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for attr in attrs:
5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                attr, value = splitvalue(attr)
5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if attr.lower() == 'type' and \
5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    type = value.upper()
5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            mtype = mimetypes.guess_type("ftp:" + url)[0]
5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            headers = ""
5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if mtype:
5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                headers += "Content-Type: %s\n" % mtype
5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if retrlen is not None and retrlen >= 0:
5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                headers += "Content-Length: %d\n" % retrlen
5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            headers = mimetools.Message(StringIO(headers))
5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return addinfourl(fp, headers, "ftp:" + url)
5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ftperrors(), msg:
5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('ftp error', msg), sys.exc_info()[2]
5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open_data(self, url, data=None):
5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Use "data" URL."""
5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not isinstance(url, str):
5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # ignore POSTed data
5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #
5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # syntax of data URLs:
5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # mediatype := [ type "/" subtype ] *( ";" parameter )
5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # data      := *urlchar
5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # parameter := attribute "=" value
5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import mimetools
5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from cStringIO import StringIO
5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from StringIO import StringIO
5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            [type, data] = url.split(',', 1)
5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ValueError:
5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError, ('data error', 'bad data URL')
5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not type:
5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = 'text/plain;charset=US-ASCII'
5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        semi = type.rfind(';')
5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if semi >= 0 and '=' not in type[semi:]:
5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            encoding = type[semi+1:]
5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = type[:semi]
5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            encoding = ''
5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg = []
5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                            time.gmtime(time.time())))
5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg.append('Content-type: %s' % type)
5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if encoding == 'base64':
5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            data = base64.decodestring(data)
5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            data = unquote(data)
6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg.append('Content-Length: %d' % len(data))
6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg.append('')
6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg.append(data)
6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        msg = '\n'.join(msg)
6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        f = StringIO(msg)
6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        headers = mimetools.Message(f, 0)
6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #f.fileno = None     # needed for addinfourl
6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return addinfourl(f, headers, url)
6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FancyURLopener(URLopener):
6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Derived class with handlers for errors we can handle (perhaps)."""
6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, *args, **kwargs):
6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        URLopener.__init__(self, *args, **kwargs)
6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.auth_cache = {}
6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tries = 0
6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.maxtries = 10
6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_default(self, url, fp, errcode, errmsg, headers):
6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Default error handling -- don't raise an exception."""
6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return addinfourl(fp, headers, "http:" + url, errcode)
6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Error 302 -- relocated (temporarily)."""
6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tries += 1
6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.maxtries and self.tries >= self.maxtries:
6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hasattr(self, "http_error_500"):
6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                meth = self.http_error_500
6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                meth = self.http_error_default
6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.tries = 0
6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return meth(url, fp, 500,
6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        "Internal Server Error: Redirect Recursion", headers)
6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                        data)
6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tries = 0
6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return result
6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if 'location' in headers:
6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            newurl = headers['location']
6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif 'uri' in headers:
6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            newurl = headers['uri']
6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fp.close()
6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # In case the server sent a relative URL, join with original:
6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        newurl = basejoin(self.type + ":" + url, newurl)
6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # For security reasons we do not allow redirects to protocols
6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # other than HTTP, HTTPS or FTP.
6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        newurl_lower = newurl.lower()
6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not (newurl_lower.startswith('http://') or
6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                newurl_lower.startswith('https://') or
6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                newurl_lower.startswith('ftp://')):
6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError('redirect error', errcode,
6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                          errmsg + " - Redirection to url '%s' is not allowed" %
6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                          newurl,
6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                          headers)
6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.open(newurl)
6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Error 301 -- also relocated (permanently)."""
6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Error 303 -- also relocated (essentially identical to 302)."""
6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Error 307 -- relocated, but turn POST into error."""
6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.http_error_default(url, fp, errcode, errmsg, headers)
6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Error 401 -- authentication required.
6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        This function supports Basic authentication only."""
6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not 'www-authenticate' in headers:
6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            URLopener.http_error_default(self, url, fp,
6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                         errcode, errmsg, headers)
6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        stuff = headers['www-authenticate']
6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not match:
6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            URLopener.http_error_default(self, url, fp,
6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                         errcode, errmsg, headers)
6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        scheme, realm = match.groups()
6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if scheme.lower() != 'basic':
6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            URLopener.http_error_default(self, url, fp,
6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                         errcode, errmsg, headers)
6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = 'retry_' + self.type + '_basic_auth'
6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return getattr(self,name)(url, realm)
6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return getattr(self,name)(url, realm, data)
6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Error 407 -- proxy authentication required.
7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        This function supports Basic authentication only."""
7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not 'proxy-authenticate' in headers:
7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            URLopener.http_error_default(self, url, fp,
7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                         errcode, errmsg, headers)
7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        stuff = headers['proxy-authenticate']
7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not match:
7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            URLopener.http_error_default(self, url, fp,
7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                         errcode, errmsg, headers)
7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        scheme, realm = match.groups()
7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if scheme.lower() != 'basic':
7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            URLopener.http_error_default(self, url, fp,
7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                         errcode, errmsg, headers)
7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = 'retry_proxy_' + self.type + '_basic_auth'
7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return getattr(self,name)(url, realm)
7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return getattr(self,name)(url, realm, data)
7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def retry_proxy_http_basic_auth(self, url, realm, data=None):
7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, selector = splithost(url)
7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        newurl = 'http://' + host + selector
7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxy = self.proxies['http']
7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        urltype, proxyhost = splittype(proxy)
7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyhost, proxyselector = splithost(proxyhost)
7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        i = proxyhost.find('@') + 1
7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyhost = proxyhost[i:]
7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user, passwd = self.get_user_passwd(proxyhost, realm, i)
7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not (user or passwd): return None
7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.proxies['http'] = 'http://' + proxyhost + proxyselector
7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl)
7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl, data)
7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def retry_proxy_https_basic_auth(self, url, realm, data=None):
7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, selector = splithost(url)
7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        newurl = 'https://' + host + selector
7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxy = self.proxies['https']
7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        urltype, proxyhost = splittype(proxy)
7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyhost, proxyselector = splithost(proxyhost)
7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        i = proxyhost.find('@') + 1
7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyhost = proxyhost[i:]
7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user, passwd = self.get_user_passwd(proxyhost, realm, i)
7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not (user or passwd): return None
7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.proxies['https'] = 'https://' + proxyhost + proxyselector
7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl)
7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl, data)
7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def retry_http_basic_auth(self, url, realm, data=None):
7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, selector = splithost(url)
7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        i = host.find('@') + 1
7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = host[i:]
7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user, passwd = self.get_user_passwd(host, realm, i)
7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not (user or passwd): return None
7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        newurl = 'http://' + host + selector
7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl)
7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl, data)
7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def retry_https_basic_auth(self, url, realm, data=None):
7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, selector = splithost(url)
7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        i = host.find('@') + 1
7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = host[i:]
7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user, passwd = self.get_user_passwd(host, realm, i)
7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not (user or passwd): return None
7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        newurl = 'https://' + host + selector
7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if data is None:
7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl)
7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.open(newurl, data)
7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def get_user_passwd(self, host, realm, clear_cache=0):
7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        key = realm + '@' + host.lower()
7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if key in self.auth_cache:
7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if clear_cache:
7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                del self.auth_cache[key]
7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return self.auth_cache[key]
7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        user, passwd = self.prompt_user_passwd(host, realm)
7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if user or passwd: self.auth_cache[key] = (user, passwd)
7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return user, passwd
7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def prompt_user_passwd(self, host, realm):
7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Override this in a GUI environment!"""
7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import getpass
7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            user = raw_input("Enter username for %s at %s: " % (realm,
7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                                                host))
7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                (user, realm, host))
8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return user, passwd
8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except KeyboardInterrupt:
8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            print
8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return None, None
8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Utility functions
8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_localhost = None
8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef localhost():
8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Return the IP address of the magic hostname 'localhost'."""
8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _localhost
8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _localhost is None:
8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _localhost = socket.gethostbyname('localhost')
8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _localhost
8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_thishost = None
8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef thishost():
8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Return the IP address of the current host."""
8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _thishost
8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _thishost is None:
8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _thishost = socket.gethostbyname(socket.gethostname())
8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _thishost
8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ftperrors = None
8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef ftperrors():
8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Return the set of errors raised by the FTP class."""
8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _ftperrors
8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _ftperrors is None:
8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import ftplib
8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _ftperrors = ftplib.all_errors
8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _ftperrors
8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_noheaders = None
8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef noheaders():
8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Return an empty mimetools.Message object."""
8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _noheaders
8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _noheaders is None:
8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import mimetools
8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from cStringIO import StringIO
8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from StringIO import StringIO
8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _noheaders = mimetools.Message(StringIO(), 0)
8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _noheaders.fp.close()   # Recycle file descriptor
8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _noheaders
8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Utility classes
8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ftpwrapper:
8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Class used by open_ftp() for cache of open FTP connections."""
8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, user, passwd, host, port, dirs,
8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                 persistent=True):
8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.user = user
8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.passwd = passwd
8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.host = host
8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.port = port
8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.dirs = dirs
8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.timeout = timeout
8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.refcount = 0
8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.keepalive = persistent
8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.init()
8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def init(self):
8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import ftplib
8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.busy = 0
8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.ftp = ftplib.FTP()
8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.ftp.connect(self.host, self.port, self.timeout)
8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.ftp.login(self.user, self.passwd)
8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for dir in self.dirs:
8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ftp.cwd(dir)
8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def retrfile(self, file, type):
8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import ftplib
8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.endtransfer()
8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else: cmd = 'TYPE ' + type; isdir = 0
8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ftp.voidcmd(cmd)
8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ftplib.all_errors:
8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.init()
8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ftp.voidcmd(cmd)
8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        conn = None
8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if file and not isdir:
8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Try to retrieve as a file
8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                cmd = 'RETR ' + file
8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                conn, retrlen = self.ftp.ntransfercmd(cmd)
8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except ftplib.error_perm, reason:
8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if str(reason)[:3] != '550':
8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not conn:
8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Set transfer mode to ASCII!
8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ftp.voidcmd('TYPE A')
8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Try a directory listing. Verify that directory exists.
8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if file:
9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pwd = self.ftp.pwd()
9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    try:
9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        self.ftp.cwd(file)
9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    except ftplib.error_perm, reason:
9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                finally:
9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.ftp.cwd(pwd)
9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                cmd = 'LIST ' + file
9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                cmd = 'LIST'
9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            conn, retrlen = self.ftp.ntransfercmd(cmd)
9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.busy = 1
9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.refcount += 1
9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        conn.close()
9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Pass back both a suitably decorated object and a retrieval length
9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return (ftpobj, retrlen)
9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def endtransfer(self):
9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not self.busy:
9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.busy = 0
9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ftp.voidresp()
9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ftperrors():
9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.keepalive = False
9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.refcount <= 0:
9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.real_close()
9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def file_close(self):
9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.endtransfer()
9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.refcount -= 1
9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.refcount <= 0 and not self.keepalive:
9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.real_close()
9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def real_close(self):
9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.endtransfer()
9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ftp.close()
9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ftperrors():
9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addbase:
9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Base class for addinfo and addclosehook."""
9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fp):
9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fp = fp
9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.read = self.fp.read
9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.readline = self.fp.readline
9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(self.fp, "fileno"):
9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileno = self.fp.fileno
9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileno = lambda: None
9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(self.fp, "__iter__"):
9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.__iter__ = self.fp.__iter__
9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hasattr(self.fp, "next"):
9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.next = self.fp.next
9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __repr__(self):
9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                             id(self), self.fp)
9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.read = None
9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.readline = None
9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.readlines = None
9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileno = None
9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.fp: self.fp.close()
9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fp = None
9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addclosehook(addbase):
9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Class to add a close hook to an open file."""
9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fp, closehook, *hookargs):
9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        addbase.__init__(self, fp)
9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closehook = closehook
9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.hookargs = hookargs
9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closehook:
9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.closehook(*self.hookargs)
9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.closehook = None
9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.hookargs = None
9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        addbase.close(self)
9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addinfo(addbase):
9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """class to add an info() method to an open file."""
9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fp, headers):
9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        addbase.__init__(self, fp)
9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.headers = headers
9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def info(self):
9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.headers
9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addinfourl(addbase):
10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """class to add info() and geturl() methods to an open file."""
10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fp, headers, url, code=None):
10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        addbase.__init__(self, fp)
10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.headers = headers
10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.url = url
10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.code = code
10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def info(self):
10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.headers
10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getcode(self):
10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.code
10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def geturl(self):
10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.url
10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Utilities to parse URLs (most of these return None for missing parts):
10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# unwrap('<URL:type://host/path>') --> 'type://host/path'
10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splittype('type:opaquestring') --> 'type', 'opaquestring'
10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splithost('//host[:port]/path') --> 'host[:port]', '/path'
10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitpasswd('user:passwd') -> 'user', 'passwd'
10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitport('host:port') --> 'host', 'port'
10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitquery('/path?query') --> '/path', 'query'
10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splittag('/path#tag') --> '/path', 'tag'
10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitattr('/path;attr1=value1;attr2=value2;...') ->
10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#   '/path', ['attr1=value1', 'attr2=value2', ...]
10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitvalue('attr=value') --> 'attr', 'value'
10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# unquote('abc%20def') -> 'abc def'
10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# quote('abc def') -> 'abc%20def')
10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    unicode
10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept NameError:
10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _is_unicode(x):
10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return 0
10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse:
10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _is_unicode(x):
10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return isinstance(x, unicode)
10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef toBytes(url):
10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """toBytes(u"URL") --> 'URL'."""
10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Most URL schemes require ASCII. If that changes, the conversion
10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # can be relaxed
10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _is_unicode(url):
10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            url = url.encode("ASCII")
10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except UnicodeError:
10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise UnicodeError("URL " + repr(url) +
10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                               " contains non-ASCII characters")
10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return url
10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef unwrap(url):
10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    url = url.strip()
10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if url[:1] == '<' and url[-1:] == '>':
10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        url = url[1:-1].strip()
10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if url[:4] == 'URL:': url = url[4:].strip()
10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return url
10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_typeprog = None
10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splittype(url):
10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _typeprog
10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _typeprog is None:
10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _typeprog = re.compile('^([^/:]+):')
10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _typeprog.match(url)
10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match:
10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        scheme = match.group(1)
10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return scheme.lower(), url[len(scheme) + 1:]
10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return None, url
10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_hostprog = None
10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splithost(url):
10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _hostprog
10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _hostprog is None:
10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _hostprog = re.compile('^//([^/?]*)(.*)$')
10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _hostprog.match(url)
10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match:
10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host_port = match.group(1)
10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        path = match.group(2)
10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if path and not path.startswith('/'):
10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            path = '/' + path
10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return host_port, path
10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return None, url
10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_userprog = None
10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splituser(host):
10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _userprog
10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _userprog is None:
10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _userprog = re.compile('^(.*)@(.*)$')
11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _userprog.match(host)
11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match: return match.group(1, 2)
11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return None, host
11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_passwdprog = None
11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitpasswd(user):
11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _passwdprog
11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _passwdprog is None:
11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _passwdprog.match(user)
11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match: return match.group(1, 2)
11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return user, None
11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splittag('/path#tag') --> '/path', 'tag'
11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_portprog = None
11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitport(host):
11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splitport('host:port') --> 'host', 'port'."""
11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _portprog
11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _portprog is None:
11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _portprog = re.compile('^(.*):([0-9]+)$')
11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _portprog.match(host)
11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match: return match.group(1, 2)
11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return host, None
11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_nportprog = None
11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitnport(host, defport=-1):
11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Split host and port, returning numeric port.
11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Return given default port if no ':' found; defaults to -1.
11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Return numerical port if a valid number are found after ':'.
11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Return None if ':' but not a valid number."""
11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _nportprog
11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _nportprog is None:
11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _nportprog = re.compile('^(.*):(.*)$')
11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _nportprog.match(host)
11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match:
11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host, port = match.group(1, 2)
11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not port: raise ValueError, "no digits"
11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            nport = int(port)
11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ValueError:
11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            nport = None
11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return host, nport
11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return host, defport
11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_queryprog = None
11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitquery(url):
11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splitquery('/path?query') --> '/path', 'query'."""
11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _queryprog
11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _queryprog is None:
11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _queryprog = re.compile('^(.*)\?([^?]*)$')
11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _queryprog.match(url)
11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match: return match.group(1, 2)
11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return url, None
11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_tagprog = None
11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splittag(url):
11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splittag('/path#tag') --> '/path', 'tag'."""
11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _tagprog
11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _tagprog is None:
11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _tagprog = re.compile('^(.*)#([^#]*)$')
11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _tagprog.match(url)
11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match: return match.group(1, 2)
11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return url, None
11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitattr(url):
11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splitattr('/path;attr1=value1;attr2=value2;...') ->
11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        '/path', ['attr1=value1', 'attr2=value2', ...]."""
11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    words = url.split(';')
11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return words[0], words[1:]
11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_valueprog = None
11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitvalue(attr):
11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """splitvalue('attr=value') --> 'attr', 'value'."""
11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    global _valueprog
11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _valueprog is None:
11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _valueprog = re.compile('^([^=]*)=(.*)$')
11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    match = _valueprog.match(attr)
11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if match: return match.group(1, 2)
11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return attr, None
11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# urlparse contains a duplicate of this method to avoid a circular import.  If
11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# you update this method, also update the copy in urlparse.  This code
11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# duplication does not exist in Python3.
11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_hexdig = '0123456789ABCDEFabcdef'
12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_hextochr = dict((a + b, chr(int(a + b, 16)))
12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                 for a in _hexdig for b in _hexdig)
12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_asciire = re.compile('([\x00-\x7f]+)')
12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef unquote(s):
12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """unquote('abc%20def') -> 'abc def'."""
12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _is_unicode(s):
12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if '%' not in s:
12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return s
12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        bits = _asciire.split(s)
12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        res = [bits[0]]
12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        append = res.append
12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in range(1, len(bits), 2):
12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            append(unquote(str(bits[i])).decode('latin1'))
12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            append(bits[i + 1])
12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ''.join(res)
12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    bits = s.split('%')
12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # fastpath
12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if len(bits) == 1:
12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s
12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    res = [bits[0]]
12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    append = res.append
12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for item in bits[1:]:
12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            append(_hextochr[item[:2]])
12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            append(item[2:])
12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except KeyError:
12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            append('%')
12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            append(item)
12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return ''.join(res)
12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef unquote_plus(s):
12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """unquote('%7e/abc+def') -> '~/abc def'"""
12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    s = s.replace('+', ' ')
12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return unquote(s)
12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoalways_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao               'abcdefghijklmnopqrstuvwxyz'
12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao               '0123456789' '_.-')
12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_safe_map = {}
12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofor i, c in zip(xrange(256), str(bytearray(xrange(256)))):
12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_safe_quoters = {}
12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef quote(s, safe='/'):
12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """quote('abc def') -> 'abc%20def'
12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Each part of a URL, e.g. the path info, the query, etc., has a
12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    different set of reserved characters that must be quoted.
12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    the following reserved characters.
12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                  "$" | ","
12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Each of these characters is reserved in some component of a URL,
12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    but not necessarily in all of them.
12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    By default, the quote function is intended for quoting the path
12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    section of a URL.  Thus, it will not encode '/'.  This character
12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    is reserved, but in typical usage the quote function is being
12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    called on a path where the existing slash characters are used as
12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    reserved characters.
12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # fastpath
12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not s:
12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if s is None:
12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise TypeError('None object cannot be quoted')
12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s
12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    cachekey = (safe, always_safe)
12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        (quoter, safe) = _safe_quoters[cachekey]
12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except KeyError:
12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        safe_map = _safe_map.copy()
12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        safe_map.update([(c, c) for c in safe])
12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        quoter = safe_map.__getitem__
12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        safe = always_safe + safe
12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _safe_quoters[cachekey] = (quoter, safe)
12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not s.rstrip(safe):
12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s
12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return ''.join(map(quoter, s))
12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef quote_plus(s, safe=''):
12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Quote the query fragment of a URL; replacing ' ' with '+'"""
12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if ' ' in s:
12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = quote(s, safe + ' ')
12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s.replace(' ', '+')
12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return quote(s, safe)
12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlencode(query, doseq=0):
12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Encode a sequence of two-element tuples or dictionary into a URL query string.
12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    If any values in the query arg are sequences and doseq is true, each
12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    sequence element is converted to a separate parameter.
12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    If the query arg is a sequence of two-element tuples, the order of the
12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    parameters in the output will match the order of parameters in the
12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    input.
13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if hasattr(query,"items"):
13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # mapping objects
13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        query = query.items()
13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # it's a bother at times that strings and string-like objects are
13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # sequences...
13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # non-sequence items should not work with len()
13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # non-empty strings will fail this
13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if len(query) and not isinstance(query[0], tuple):
13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError
13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # zero-length sequences of all types will get here and succeed,
13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # but that's a minor nit - since the original implementation
13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # allowed empty dicts that type of behavior probably should be
13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # preserved for consistency
13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except TypeError:
13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ty,va,tb = sys.exc_info()
13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise TypeError, "not a valid non-string sequence or mapping object", tb
13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    l = []
13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not doseq:
13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # preserve old behavior
13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for k, v in query:
13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            k = quote_plus(str(k))
13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            v = quote_plus(str(v))
13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            l.append(k + '=' + v)
13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for k, v in query:
13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            k = quote_plus(str(k))
13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if isinstance(v, str):
13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                v = quote_plus(v)
13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                l.append(k + '=' + v)
13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif _is_unicode(v):
13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # is there a reasonable way to convert to ASCII?
13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # encode generates a string, but "replace" or "ignore"
13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # lose information and "strict" can raise UnicodeError
13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                v = quote_plus(v.encode("ASCII","replace"))
13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                l.append(k + '=' + v)
13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # is this a sufficient test for sequence-ness?
13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    len(v)
13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except TypeError:
13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # not a sequence
13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    v = quote_plus(str(v))
13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    l.append(k + '=' + v)
13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # loop over the sequence
13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    for elt in v:
13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        l.append(k + '=' + quote_plus(str(elt)))
13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return '&'.join(l)
13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Proxy handling
13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef getproxies_environment():
13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Return a dictionary of scheme -> proxy server URL mappings.
13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Scan the environment for variables named <scheme>_proxy;
13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    this seems to be the standard convention.  If you need a
13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    different way, you can pass a proxies dictionary to the
13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    [Fancy]URLopener constructor.
13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    proxies = {}
13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for name, value in os.environ.items():
13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = name.lower()
13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if value and name[-6:] == '_proxy':
13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxies[name[:-6]] = value
13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return proxies
13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef proxy_bypass_environment(host):
13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Test if proxies should not be used for a particular host.
13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Checks the environment for a variable named no_proxy, which should
13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    be a list of DNS suffixes separated by commas, or '*' for all hosts.
13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # '*' is special case for always bypass
13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if no_proxy == '*':
13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return 1
13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # strip port off host
13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    hostonly, port = splitport(host)
13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # check if the host ends with any of the DNS suffixes
13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for name in no_proxy_list:
13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if name and (hostonly.endswith(name) or host.endswith(name)):
13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 1
13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # otherwise, don't bypass
13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return 0
13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif sys.platform == 'darwin':
13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from _scproxy import _get_proxy_settings, _get_proxies
13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def proxy_bypass_macosx_sysconf(host):
13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Return True iff this host shouldn't be accessed using a proxy
13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        This function uses the MacOSX framework SystemConfiguration
14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        to fetch the proxy information.
14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import re
14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import socket
14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        from fnmatch import fnmatch
14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        hostonly, port = splitport(host)
14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def ip2num(ipAddr):
14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parts = ipAddr.split('.')
14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parts = map(int, parts)
14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if len(parts) != 4:
14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parts = (parts + [0, 0, 0, 0])[:4]
14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxy_settings = _get_proxy_settings()
14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Check for simple host names:
14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if '.' not in host:
14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if proxy_settings['exclude_simple']:
14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return True
14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        hostIP = None
14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for value in proxy_settings.get('exceptions', ()):
14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Items in the list are strings like these: *.local, 169.254/16
14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not value: continue
14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if m is not None:
14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if hostIP is None:
14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    try:
14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        hostIP = socket.gethostbyname(hostonly)
14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        hostIP = ip2num(hostIP)
14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    except socket.error:
14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        continue
14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                base = ip2num(m.group(1))
14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                mask = m.group(2)
14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if mask is None:
14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    mask = 8 * (m.group(1).count('.') + 1)
14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    mask = int(mask[1:])
14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                mask = 32 - mask
14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if (hostIP >> mask) == (base >> mask):
14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return True
14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif fnmatch(host, value):
14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return True
14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return False
14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getproxies_macosx_sysconf():
14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        This function uses the MacOSX framework SystemConfiguration
14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        to fetch the proxy information.
14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return _get_proxies()
14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def proxy_bypass(host):
14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if getproxies_environment():
14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return proxy_bypass_environment(host)
14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return proxy_bypass_macosx_sysconf(host)
14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getproxies():
14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return getproxies_environment() or getproxies_macosx_sysconf()
14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelif os.name == 'nt':
14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getproxies_registry():
14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Win32 uses the registry to store proxies.
14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxies = {}
14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import _winreg
14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Std module, so should be around - but you never know!
14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return proxies
14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxyEnable = _winreg.QueryValueEx(internetSettings,
14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                               'ProxyEnable')[0]
14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if proxyEnable:
14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # Returned as Unicode but problems if not converted to ASCII
14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                proxyServer = str(_winreg.QueryValueEx(internetSettings,
14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                                       'ProxyServer')[0])
14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if '=' in proxyServer:
14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # Per-protocol settings
14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    for p in proxyServer.split(';'):
14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        protocol, address = p.split('=', 1)
14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        # See if address has a type:// prefix
14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        import re
14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        if not re.match('^([^/:]+)://', address):
15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            address = '%s://%s' % (protocol, address)
15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        proxies[protocol] = address
15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # Use one setting for all protocols
15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if proxyServer[:5] == 'http:':
15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        proxies['http'] = proxyServer
15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        proxies['http'] = 'http://%s' % proxyServer
15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        proxies['https'] = 'https://%s' % proxyServer
15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        proxies['ftp'] = 'ftp://%s' % proxyServer
15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            internetSettings.Close()
15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except (WindowsError, ValueError, TypeError):
15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Either registry key not found etc, or the value in an
15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # unexpected format.
15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # proxies already set up to be empty so nothing to do
15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return proxies
15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getproxies():
15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Returns settings gathered from the environment, if specified,
15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        or the registry.
15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
15250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return getproxies_environment() or getproxies_registry()
15260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def proxy_bypass_registry(host):
15280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import _winreg
15300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import re
15310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
15320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Std modules, so should be around - but you never know!
15330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 0
15340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
15360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
15370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxyEnable = _winreg.QueryValueEx(internetSettings,
15380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                               'ProxyEnable')[0]
15390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
15400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                                     'ProxyOverride')[0])
15410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # ^^^^ Returned as Unicode but problems if not converted to ASCII
15420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except WindowsError:
15430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 0
15440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not proxyEnable or not proxyOverride:
15450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 0
15460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # try to make a host list from name and IP address.
15470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        rawHost, port = splitport(host)
15480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        host = [rawHost]
15490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            addr = socket.gethostbyname(rawHost)
15510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if addr != rawHost:
15520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                host.append(addr)
15530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except socket.error:
15540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
15550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fqdn = socket.getfqdn(rawHost)
15570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if fqdn != rawHost:
15580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                host.append(fqdn)
15590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except socket.error:
15600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
15610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # make a check value list from the registry entry: replace the
15620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # '<local>' string by the localhost entry and the corresponding
15630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # canonical entry.
15640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        proxyOverride = proxyOverride.split(';')
15650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # now check if we match one of the registry values.
15660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for test in proxyOverride:
15670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if test == '<local>':
15680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if '.' not in rawHost:
15690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return 1
15700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            test = test.replace(".", r"\.")     # mask dots
15710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            test = test.replace("*", r".*")     # change glob sequence
15720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            test = test.replace("?", r".")      # change glob char
15730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for val in host:
15740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # print "%s <--> %s" %( test, val )
15750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if re.match(test, val, re.I):
15760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return 1
15770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return 0
15780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def proxy_bypass(host):
15800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a dictionary of scheme -> proxy server URL mappings.
15810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Returns settings gathered from the environment, if specified,
15830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        or the registry.
15840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
15860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if getproxies_environment():
15870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return proxy_bypass_environment(host)
15880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
15890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return proxy_bypass_registry(host)
15900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15910a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse:
15920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # By default use environment variables
15930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    getproxies = getproxies_environment
15940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    proxy_bypass = proxy_bypass_environment
15950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Test and time quote() and unquote()
15970a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef test1():
15980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    s = ''
15990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for i in range(256): s = s + chr(i)
16000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    s = s*4
16010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    t0 = time.time()
16020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    qs = quote(s)
16030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    uqs = unquote(qs)
16040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    t1 = time.time()
16050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if uqs != s:
16060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        print 'Wrong!'
16070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    print repr(s)
16080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    print repr(qs)
16090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    print repr(uqs)
16100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    print round(t1 - t0, 3), 'sec'
16110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef reporthook(blocknum, blocksize, totalsize):
16140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Report during remote transfers
16150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    print "Block number: %d, Block size: %d, Total size: %d" % (
16160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        blocknum, blocksize, totalsize)
1617