14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"""Open an arbitrary URL. 24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 34adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoSee the following document for more info on URLs: 44adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"Names and Addresses, URIs, URLs, URNs, URCs", at 54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohttp://www.w3.org/pub/WWW/Addressing/Overview.html 64adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 74adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoSee also the HTTP spec (from which the error codes are derived): 84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"HTTP - Hypertext Transfer Protocol", at 94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohttp://www.w3.org/pub/WWW/Protocols/ 104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 114adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoRelated standards and specs: 124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao- RFC1808: the "relative URL" spec. (authoritative status) 134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao- RFC1738 - the "URL standard". (authoritative status) 144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao- RFC1630 - the "URI spec". (informational status) 154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 164adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoThe object returned by URLopener().open(file) will differ per 174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoprotocol. All you know is that is has methods read(), readline(), 184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoreadlines(), fileno(), close() and info(). The read*(), fileno() 194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoand close() methods work like those of open files. 204adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoThe info() method returns a mimetools.Message object which can be 214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoused to query various info about the object, if available. 224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao(mimetools.Message objects are queried with the getheader() method.) 234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao""" 244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport string 264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport socket 274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport os 284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport time 294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport sys 304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport base64 314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport re 324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom urlparse import urljoin as basejoin 344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "urlencode", "url2pathname", "pathname2url", "splittag", 384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "splittype", "splithost", "splituser", "splitpasswd", "splitport", 404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "splitnport", "splitquery", "splitattr", "splitvalue", 414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "getproxies"] 424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__version__ = '1.17' # XXX This version is not always updated :-( 444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 454adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMAXFTPCACHE = 10 # Trim the ftp cache beyond this size 464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Helper for non-unix systems 484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif os.name == 'nt': 494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from nturl2path import url2pathname, pathname2url 504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelif os.name == 'riscos': 514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from rourl2path import url2pathname, pathname2url 524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse: 534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def url2pathname(pathname): 544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """OS-specific conversion from a relative URL of the 'file' scheme 554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao to a file system path; not recommended for general use.""" 564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return unquote(pathname) 574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def pathname2url(pathname): 594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """OS-specific conversion from a file system path to a relative URL 604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao of the 'file' scheme; not recommended for general use.""" 614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return quote(pathname) 624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# This really consists of two pieces: 644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# (1) a class which handles opening of all sorts of URLs 654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# (plus assorted utilities etc.) 664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# (2) a set of functions for parsing URLs 674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# XXX Should these be separated out into different modules? 684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Shortcut for basic usage 714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_urlopener = None 724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlopen(url, data=None, proxies=None): 734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Create a file-like object for the specified URL to read from.""" 744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from warnings import warnpy3k 754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "favor of urllib2.urlopen()", stacklevel=2) 774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _urlopener 794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxies is not None: 804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao opener = FancyURLopener(proxies=proxies) 814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif not _urlopener: 824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao opener = FancyURLopener() 834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _urlopener = opener 844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao opener = _urlopener 864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return opener.open(url) 884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return opener.open(url, data) 904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlretrieve(url, filename=None, reporthook=None, data=None): 914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _urlopener 924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not _urlopener: 934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _urlopener = FancyURLopener() 944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _urlopener.retrieve(url, filename, reporthook, data) 954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlcleanup(): 964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _urlopener: 974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _urlopener.cleanup() 984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _safe_quoters.clear() 994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ftpcache.clear() 1004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# check for SSL 1024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotry: 1034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import ssl 1044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoexcept: 1054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _have_ssl = False 1064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse: 1074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _have_ssl = True 1084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# exception raised when downloaded size does not match content-length 1104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass ContentTooShortError(IOError): 1114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, message, content): 1124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao IOError.__init__(self, message) 1134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.content = content 1144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoftpcache = {} 1164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass URLopener: 1174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Class to open URLs. 1184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This is a class rather than just a subroutine because we may need 1194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao more than one set of global protocol-specific options. 1204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Note -- this is a base class for those who don't want the 1214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao automatic handling of errors type 302 (relocated) and 401 1224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (authorization needed).""" 1234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao __tempfiles = None 1254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version = "Python-urllib/%s" % __version__ 1274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Constructor 1294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, proxies=None, **x509): 1304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxies is None: 1314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies = getproxies() 1324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 1334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.proxies = proxies 1344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.key_file = x509.get('key_file') 1354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.cert_file = x509.get('cert_file') 1364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.addheaders = [('User-Agent', self.version)] 1374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__tempfiles = [] 1384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__unlink = os.unlink # See cleanup() 1394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tempcache = None 1404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Undocumented feature: if you assign {} to tempcache, 1414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # it is used to cache files retrieved with 1424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # self.retrieve(). This is not enabled by default 1434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # since it does not work for changing documents (and I 1444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # haven't got the logic to check expiration headers 1454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # yet). 1464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftpcache = ftpcache 1474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Undocumented feature: you can use a different 1484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # ftp cache by assigning to the .ftpcache member; 1494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # in case you want logically independent URL openers 1504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX This is not threadsafe. Bah. 1514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __del__(self): 1534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.close() 1544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def close(self): 1564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.cleanup() 1574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def cleanup(self): 1594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # This code sometimes runs when the rest of this module 1604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # has already been deleted, so it can't use any globals 1614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # or import anything. 1624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.__tempfiles: 1634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for file in self.__tempfiles: 1644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 1654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__unlink(file) 1664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except OSError: 1674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 1684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self.__tempfiles[:] 1694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.tempcache: 1704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tempcache.clear() 1714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def addheader(self, *args): 1734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Add a header to be used by the HTTP interface only 1744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao e.g. u.addheader('Accept', 'sound/basic')""" 1754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.addheaders.append(args) 1764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # External interface 1784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open(self, fullurl, data=None): 1794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use URLopener().open(file) instead of open(file, 'r').""" 1804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fullurl = unwrap(toBytes(fullurl)) 1814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # percent encode url, fixing lame server errors for e.g, like space 1824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # within url paths. 1834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 1844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.tempcache and fullurl in self.tempcache: 1854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao filename, headers = self.tempcache[fullurl] 1864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp = open(filename, 'rb') 1874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(fp, headers, fullurl) 1884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype, url = splittype(fullurl) 1894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not urltype: 1904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype = 'file' 1914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if urltype in self.proxies: 1924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy = self.proxies[urltype] 1934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype, proxyhost = splittype(proxy) 1944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(proxyhost) 1954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = (host, fullurl) # Signal special case to open_*() 1964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 1974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy = None 1984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = 'open_' + urltype 1994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.type = urltype 2004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = name.replace('-', '_') 2014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not hasattr(self, name): 2024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy: 2034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open_unknown_proxy(proxy, fullurl, data) 2044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 2054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open_unknown(fullurl, data) 2064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 2074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 2084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getattr(self, name)(url) 2094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 2104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getattr(self, name)(url, data) 2114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except socket.error, msg: 2124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('socket error', msg), sys.exc_info()[2] 2134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_unknown(self, fullurl, data=None): 2154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Overridable interface to open unknown URL type.""" 2164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao type, url = splittype(fullurl) 2174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('url error', 'unknown url type', type) 2184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_unknown_proxy(self, proxy, fullurl, data=None): 2204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Overridable interface to open unknown URL type.""" 2214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao type, url = splittype(fullurl) 2224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 2234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # External interface 2254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def retrieve(self, url, filename=None, reporthook=None, data=None): 2264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """retrieve(url) returns (filename, headers) for a local object 2274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao or (tempfilename, headers) for a remote object.""" 2284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = unwrap(toBytes(url)) 2294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.tempcache and url in self.tempcache: 2304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.tempcache[url] 2314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao type, url1 = splittype(url) 2324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if filename is None and (not type or type == 'file'): 2334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 2344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp = self.open_local_file(url1) 2354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hdrs = fp.info() 2364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp.close() 2374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return url2pathname(splithost(url1)[1]), hdrs 2384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except IOError: 2394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 2404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp = self.open(url, data) 2414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 2424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = fp.info() 2434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if filename: 2444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tfp = open(filename, 'wb') 2454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 2464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import tempfile 2474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao garbage, path = splittype(url) 2484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao garbage, path = splithost(path or "") 2494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path, garbage = splitquery(path or "") 2504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path, garbage = splitattr(path or "") 2514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao suffix = os.path.splitext(path)[1] 2524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (fd, filename) = tempfile.mkstemp(suffix) 2534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__tempfiles.append(filename) 2544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tfp = os.fdopen(fd, 'wb') 2554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 2564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = filename, headers 2574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.tempcache is not None: 2584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tempcache[url] = result 2594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bs = 1024*8 2604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao size = -1 2614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao read = 0 2624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao blocknum = 0 2634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if "content-length" in headers: 2644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao size = int(headers["Content-Length"]) 2654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if reporthook: 2664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao reporthook(blocknum, bs, size) 2674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao while 1: 2684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao block = fp.read(bs) 2694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if block == "": 2704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 2714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao read += len(block) 2724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tfp.write(block) 2734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao blocknum += 1 2744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if reporthook: 2754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao reporthook(blocknum, bs, size) 2764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 2774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tfp.close() 2784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 2794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp.close() 2804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # raise exception if actual size does not match content-length header 2824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if size >= 0 and read < size: 2834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise ContentTooShortError("retrieval incomplete: got only %i out " 2844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "of %i bytes" % (read, size), result) 2854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return result 2874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Each method named open_<type> knows how to open that type of URL 2894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_http(self, url, data=None): 2914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use HTTP protocol.""" 2924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import httplib 2934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd = None 2944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_passwd= None 2954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if isinstance(url, str): 2964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(url) 2974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if host: 2984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd, host = splituser(host) 2994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = unquote(host) 3004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao realhost = host 3014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = url 3034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # check whether the proxy contains authorization information 3044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_passwd, host = splituser(host) 3054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # now we proceed with the url we want to obtain 3064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype, rest = splittype(selector) 3074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = rest 3084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd = None 3094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if urltype.lower() != 'http': 3104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao realhost = None 3114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao realhost, rest = splithost(rest) 3134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if realhost: 3144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd, realhost = splituser(realhost) 3154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user_passwd: 3164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao selector = "%s://%s%s" % (urltype, realhost, rest) 3174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy_bypass(realhost): 3184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = realhost 3194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao #print "proxy via http:", host, selector 3214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not host: raise IOError, ('http error', 'no host given') 3224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy_passwd: 3244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_passwd = unquote(proxy_passwd) 3254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_auth = base64.b64encode(proxy_passwd).strip() 3264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_auth = None 3284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user_passwd: 3304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd = unquote(user_passwd) 3314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao auth = base64.b64encode(user_passwd).strip() 3324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao auth = None 3344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h = httplib.HTTP(host) 3354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is not None: 3364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putrequest('POST', selector) 3374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putheader('Content-Type', 'application/x-www-form-urlencoded') 3384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putheader('Content-Length', '%d' % len(data)) 3394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putrequest('GET', selector) 3414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 3424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if auth: h.putheader('Authorization', 'Basic %s' % auth) 3434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if realhost: h.putheader('Host', realhost) 3444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for args in self.addheaders: h.putheader(*args) 3454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.endheaders(data) 3464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers = h.getreply() 3474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp = h.getfile() 3484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if errcode == -1: 3494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if fp: fp.close() 3504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # something went wrong with the HTTP status line 3514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('http protocol error', 0, 3524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'got a bad status line', None) 3534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # According to RFC 2616, "2xx" code indicates that the client's 3544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # request was successfully received, understood, and accepted. 3554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (200 <= errcode < 300): 3564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(fp, headers, "http:" + url, errcode) 3574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 3594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error(url, fp, errcode, errmsg, headers) 3604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error(url, fp, errcode, errmsg, headers, data) 3624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error(self, url, fp, errcode, errmsg, headers, data=None): 3644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Handle http errors. 3654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Derived class can override this, or provide specific handlers 3664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao named http_error_DDD where DDD is the 3-digit error code.""" 3674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # First check if there's a specific handler for this error 3684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = 'http_error_%d' % errcode 3694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(self, name): 3704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao method = getattr(self, name) 3714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 3724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = method(url, fp, errcode, errmsg, headers) 3734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = method(url, fp, errcode, errmsg, headers, data) 3754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if result: return result 3764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error_default(url, fp, errcode, errmsg, headers) 3774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_default(self, url, fp, errcode, errmsg, headers): 3794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Default error handler: close the connection and raise IOError.""" 3804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp.close() 3814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('http error', errcode, errmsg, headers) 3824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _have_ssl: 3844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_https(self, url, data=None): 3854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use HTTPS protocol.""" 3864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import httplib 3884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd = None 3894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_passwd = None 3904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if isinstance(url, str): 3914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(url) 3924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if host: 3934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd, host = splituser(host) 3944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = unquote(host) 3954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao realhost = host 3964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = url 3984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # here, we determine, whether the proxy contains authorization information 3994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_passwd, host = splituser(host) 4004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype, rest = splittype(selector) 4014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = rest 4024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd = None 4034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if urltype.lower() != 'https': 4044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao realhost = None 4054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao realhost, rest = splithost(rest) 4074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if realhost: 4084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd, realhost = splituser(realhost) 4094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user_passwd: 4104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao selector = "%s://%s%s" % (urltype, realhost, rest) 4114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao #print "proxy via https:", host, selector 4124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not host: raise IOError, ('https error', 'no host given') 4134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy_passwd: 4144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_passwd = unquote(proxy_passwd) 4154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_auth = base64.b64encode(proxy_passwd).strip() 4164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_auth = None 4184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user_passwd: 4194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user_passwd = unquote(user_passwd) 4204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao auth = base64.b64encode(user_passwd).strip() 4214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao auth = None 4234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h = httplib.HTTPS(host, 0, 4244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao key_file=self.key_file, 4254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cert_file=self.cert_file) 4264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is not None: 4274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putrequest('POST', selector) 4284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putheader('Content-Type', 4294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'application/x-www-form-urlencoded') 4304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putheader('Content-Length', '%d' % len(data)) 4314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.putrequest('GET', selector) 4334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 4344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if auth: h.putheader('Authorization', 'Basic %s' % auth) 4354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if realhost: h.putheader('Host', realhost) 4364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for args in self.addheaders: h.putheader(*args) 4374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao h.endheaders(data) 4384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers = h.getreply() 4394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp = h.getfile() 4404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if errcode == -1: 4414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if fp: fp.close() 4424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # something went wrong with the HTTP status line 4434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('http protocol error', 0, 4444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'got a bad status line', None) 4454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # According to RFC 2616, "2xx" code indicates that the client's 4464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # request was successfully received, understood, and accepted. 4474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (200 <= errcode < 300): 4484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(fp, headers, "https:" + url, errcode) 4494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 4514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error(url, fp, errcode, errmsg, headers) 4524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error(url, fp, errcode, errmsg, headers, 4544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao data) 4554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_file(self, url): 4574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use local file or FTP depending on form of URL.""" 4584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not isinstance(url, str): 4594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 4604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 4614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open_ftp(url) 4624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open_local_file(url) 4644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_local_file(self, url): 4664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use local file.""" 4674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import mimetypes, mimetools, email.utils 4684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 4694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from cStringIO import StringIO 4704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ImportError: 4714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from StringIO import StringIO 4724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, file = splithost(url) 4734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao localname = url2pathname(file) 4744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 4754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao stats = os.stat(localname) 4764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except OSError, e: 4774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError(e.errno, e.strerror, e.filename) 4784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao size = stats.st_size 4794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 4804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mtype = mimetypes.guess_type(url)[0] 4814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = mimetools.Message(StringIO( 4824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 4834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (mtype or 'text/plain', size, modified))) 4844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not host: 4854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urlfile = file 4864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if file[:1] == '/': 4874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urlfile = 'file://' + file 4884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif file[:2] == './': 4894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 4904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(open(localname, 'rb'), 4914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers, urlfile) 4924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, port = splitport(host) 4934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not port \ 4944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao and socket.gethostbyname(host) in (localhost(), thishost()): 4954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urlfile = file 4964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if file[:1] == '/': 4974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urlfile = 'file://' + file 4984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(open(localname, 'rb'), 4994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers, urlfile) 5004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('local file error', 'not on local host') 5014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_ftp(self, url): 5034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use FTP protocol.""" 5044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not isinstance(url, str): 5054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 5064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import mimetypes, mimetools 5074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 5084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from cStringIO import StringIO 5094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ImportError: 5104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from StringIO import StringIO 5114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, path = splithost(url) 5124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not host: raise IOError, ('ftp error', 'no host given') 5134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, port = splitport(host) 5144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user, host = splituser(host) 5154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user: user, passwd = splitpasswd(user) 5164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: passwd = None 5174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = unquote(host) 5184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user = user or '' 5194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao passwd = passwd or '' 5204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = socket.gethostbyname(host) 5214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not port: 5224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import ftplib 5234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = ftplib.FTP_PORT 5244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 5254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = int(port) 5264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path, attrs = splitattr(path) 5274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = unquote(path) 5284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao dirs = path.split('/') 5294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao dirs, file = dirs[:-1], dirs[-1] 5304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if dirs and not dirs[0]: dirs = dirs[1:] 5314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if dirs and not dirs[0]: dirs[0] = '/' 5324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao key = user, host, port, '/'.join(dirs) 5334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX thread unsafe! 5344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if len(self.ftpcache) > MAXFTPCACHE: 5354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Prune the cache, rather arbitrarily 5364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for k in self.ftpcache.keys(): 5374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k != key: 5384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = self.ftpcache[k] 5394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self.ftpcache[k] 5404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v.close() 5414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 5424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not key in self.ftpcache: 5434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftpcache[key] = \ 5444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ftpwrapper(user, passwd, host, port, dirs) 5454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not file: type = 'D' 5464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: type = 'I' 5474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for attr in attrs: 5484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attr, value = splitvalue(attr) 5494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if attr.lower() == 'type' and \ 5504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value in ('a', 'A', 'i', 'I', 'd', 'D'): 5514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao type = value.upper() 5524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 5534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mtype = mimetypes.guess_type("ftp:" + url)[0] 5544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = "" 5554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if mtype: 5564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers += "Content-Type: %s\n" % mtype 5574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if retrlen is not None and retrlen >= 0: 5584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers += "Content-Length: %d\n" % retrlen 5594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = mimetools.Message(StringIO(headers)) 5604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(fp, headers, "ftp:" + url) 5614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ftperrors(), msg: 5624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('ftp error', msg), sys.exc_info()[2] 5634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def open_data(self, url, data=None): 5654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Use "data" URL.""" 5664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not isinstance(url, str): 5674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 5684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # ignore POSTed data 5694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # 5704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # syntax of data URLs: 5714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 5724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # mediatype := [ type "/" subtype ] *( ";" parameter ) 5734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # data := *urlchar 5744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # parameter := attribute "=" value 5754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import mimetools 5764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 5774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from cStringIO import StringIO 5784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ImportError: 5794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from StringIO import StringIO 5804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 5814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [type, data] = url.split(',', 1) 5824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 5834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('data error', 'bad data URL') 5844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not type: 5854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao type = 'text/plain;charset=US-ASCII' 5864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao semi = type.rfind(';') 5874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if semi >= 0 and '=' not in type[semi:]: 5884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao encoding = type[semi+1:] 5894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao type = type[:semi] 5904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 5914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao encoding = '' 5924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg = [] 5934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 5944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao time.gmtime(time.time()))) 5954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg.append('Content-type: %s' % type) 5964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if encoding == 'base64': 5974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao data = base64.decodestring(data) 5984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 5994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao data = unquote(data) 6004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg.append('Content-Length: %d' % len(data)) 6014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg.append('') 6024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg.append(data) 6034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg = '\n'.join(msg) 6044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao f = StringIO(msg) 6054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = mimetools.Message(f, 0) 6064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao #f.fileno = None # needed for addinfourl 6074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(f, headers, url) 6084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass FancyURLopener(URLopener): 6114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Derived class with handlers for errors we can handle (perhaps).""" 6124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, *args, **kwargs): 6144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.__init__(self, *args, **kwargs) 6154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.auth_cache = {} 6164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tries = 0 6174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.maxtries = 10 6184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_default(self, url, fp, errcode, errmsg, headers): 6204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Default error handling -- don't raise an exception.""" 6214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return addinfourl(fp, headers, "http:" + url, errcode) 6224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 6244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Error 302 -- relocated (temporarily).""" 6254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tries += 1 6264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.maxtries and self.tries >= self.maxtries: 6274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(self, "http_error_500"): 6284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao meth = self.http_error_500 6294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 6304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao meth = self.http_error_default 6314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tries = 0 6324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return meth(url, fp, 500, 6334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "Internal Server Error: Redirect Recursion", headers) 6344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = self.redirect_internal(url, fp, errcode, errmsg, headers, 6354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao data) 6364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.tries = 0 6374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return result 6384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 6404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if 'location' in headers: 6414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = headers['location'] 6424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif 'uri' in headers: 6434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = headers['uri'] 6444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 6454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 6464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fp.close() 6474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # In case the server sent a relative URL, join with original: 6484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = basejoin(self.type + ":" + url, newurl) 6494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # For security reasons we do not allow redirects to protocols 6514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # other than HTTP, HTTPS or FTP. 6524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl_lower = newurl.lower() 6534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (newurl_lower.startswith('http://') or 6544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl_lower.startswith('https://') or 6554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl_lower.startswith('ftp://')): 6564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError('redirect error', errcode, 6574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errmsg + " - Redirection to url '%s' is not allowed" % 6584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl, 6594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers) 6604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl) 6624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 6644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Error 301 -- also relocated (permanently).""" 6654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error_302(url, fp, errcode, errmsg, headers, data) 6664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 6684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Error 303 -- also relocated (essentially identical to 302).""" 6694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error_302(url, fp, errcode, errmsg, headers, data) 6704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 6724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Error 307 -- relocated, but turn POST into error.""" 6734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 6744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error_302(url, fp, errcode, errmsg, headers, data) 6754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 6764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.http_error_default(url, fp, errcode, errmsg, headers) 6774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 6794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Error 401 -- authentication required. 6804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This function supports Basic authentication only.""" 6814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not 'www-authenticate' in headers: 6824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.http_error_default(self, url, fp, 6834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers) 6844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao stuff = headers['www-authenticate'] 6854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 6864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 6874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not match: 6884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.http_error_default(self, url, fp, 6894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers) 6904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao scheme, realm = match.groups() 6914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if scheme.lower() != 'basic': 6924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.http_error_default(self, url, fp, 6934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers) 6944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = 'retry_' + self.type + '_basic_auth' 6954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 6964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getattr(self,name)(url, realm) 6974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 6984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getattr(self,name)(url, realm, data) 6994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 7014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Error 407 -- proxy authentication required. 7024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This function supports Basic authentication only.""" 7034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not 'proxy-authenticate' in headers: 7044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.http_error_default(self, url, fp, 7054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers) 7064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao stuff = headers['proxy-authenticate'] 7074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 7084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 7094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not match: 7104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.http_error_default(self, url, fp, 7114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers) 7124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao scheme, realm = match.groups() 7134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if scheme.lower() != 'basic': 7144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao URLopener.http_error_default(self, url, fp, 7154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao errcode, errmsg, headers) 7164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = 'retry_proxy_' + self.type + '_basic_auth' 7174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 7184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getattr(self,name)(url, realm) 7194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getattr(self,name)(url, realm, data) 7214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def retry_proxy_http_basic_auth(self, url, realm, data=None): 7234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(url) 7244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = 'http://' + host + selector 7254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy = self.proxies['http'] 7264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype, proxyhost = splittype(proxy) 7274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyhost, proxyselector = splithost(proxyhost) 7284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = proxyhost.find('@') + 1 7294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyhost = proxyhost[i:] 7304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user, passwd = self.get_user_passwd(proxyhost, realm, i) 7314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (user or passwd): return None 7324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 7334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.proxies['http'] = 'http://' + proxyhost + proxyselector 7344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 7354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl) 7364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl, data) 7384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def retry_proxy_https_basic_auth(self, url, realm, data=None): 7404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(url) 7414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = 'https://' + host + selector 7424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy = self.proxies['https'] 7434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urltype, proxyhost = splittype(proxy) 7444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyhost, proxyselector = splithost(proxyhost) 7454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = proxyhost.find('@') + 1 7464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyhost = proxyhost[i:] 7474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user, passwd = self.get_user_passwd(proxyhost, realm, i) 7484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (user or passwd): return None 7494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 7504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.proxies['https'] = 'https://' + proxyhost + proxyselector 7514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 7524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl) 7534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl, data) 7554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def retry_http_basic_auth(self, url, realm, data=None): 7574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(url) 7584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = host.find('@') + 1 7594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = host[i:] 7604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user, passwd = self.get_user_passwd(host, realm, i) 7614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (user or passwd): return None 7624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 7634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = 'http://' + host + selector 7644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 7654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl) 7664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl, data) 7684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def retry_https_basic_auth(self, url, realm, data=None): 7704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, selector = splithost(url) 7714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = host.find('@') + 1 7724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = host[i:] 7734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user, passwd = self.get_user_passwd(host, realm, i) 7744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (user or passwd): return None 7754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 7764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao newurl = 'https://' + host + selector 7774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if data is None: 7784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl) 7794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.open(newurl, data) 7814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def get_user_passwd(self, host, realm, clear_cache=0): 7834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao key = realm + '@' + host.lower() 7844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if key in self.auth_cache: 7854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if clear_cache: 7864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self.auth_cache[key] 7874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.auth_cache[key] 7894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user, passwd = self.prompt_user_passwd(host, realm) 7904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user or passwd: self.auth_cache[key] = (user, passwd) 7914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return user, passwd 7924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def prompt_user_passwd(self, host, realm): 7944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Override this in a GUI environment!""" 7954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import getpass 7964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 7974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao user = raw_input("Enter username for %s at %s: " % (realm, 7984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host)) 7994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao passwd = getpass.getpass("Enter password for %s in %s at %s: " % 8004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (user, realm, host)) 8014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return user, passwd 8024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except KeyboardInterrupt: 8034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print 8044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None, None 8054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Utility functions 8084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_localhost = None 8104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef localhost(): 8114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return the IP address of the magic hostname 'localhost'.""" 8124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _localhost 8134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _localhost is None: 8144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _localhost = socket.gethostbyname('localhost') 8154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _localhost 8164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_thishost = None 8184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef thishost(): 8194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return the IP address of the current host.""" 8204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _thishost 8214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _thishost is None: 8224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _thishost = socket.gethostbyname(socket.gethostname()) 8234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _thishost 8244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_ftperrors = None 8264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef ftperrors(): 8274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return the set of errors raised by the FTP class.""" 8284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _ftperrors 8294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _ftperrors is None: 8304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import ftplib 8314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _ftperrors = ftplib.all_errors 8324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _ftperrors 8334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_noheaders = None 8354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef noheaders(): 8364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return an empty mimetools.Message object.""" 8374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _noheaders 8384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _noheaders is None: 8394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import mimetools 8404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 8414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from cStringIO import StringIO 8424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ImportError: 8434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from StringIO import StringIO 8444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _noheaders = mimetools.Message(StringIO(), 0) 8454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _noheaders.fp.close() # Recycle file descriptor 8464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _noheaders 8474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Utility classes 8504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass ftpwrapper: 8524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Class used by open_ftp() for cache of open FTP connections.""" 8534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, user, passwd, host, port, dirs, 8554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 8564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao persistent=True): 8574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.user = user 8584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.passwd = passwd 8594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.host = host 8604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.port = port 8614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.dirs = dirs 8624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.timeout = timeout 8634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.refcount = 0 8644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.keepalive = persistent 8654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.init() 8664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def init(self): 8684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import ftplib 8694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.busy = 0 8704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp = ftplib.FTP() 8714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.connect(self.host, self.port, self.timeout) 8724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.login(self.user, self.passwd) 8734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for dir in self.dirs: 8744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.cwd(dir) 8754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def retrfile(self, file, type): 8774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import ftplib 8784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.endtransfer() 8794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 8804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: cmd = 'TYPE ' + type; isdir = 0 8814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 8824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.voidcmd(cmd) 8834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ftplib.all_errors: 8844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.init() 8854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.voidcmd(cmd) 8864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao conn = None 8874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if file and not isdir: 8884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Try to retrieve as a file 8894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 8904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cmd = 'RETR ' + file 8914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao conn, retrlen = self.ftp.ntransfercmd(cmd) 8924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ftplib.error_perm, reason: 8934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if str(reason)[:3] != '550': 8944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('ftp error', reason), sys.exc_info()[2] 8954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not conn: 8964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Set transfer mode to ASCII! 8974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.voidcmd('TYPE A') 8984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Try a directory listing. Verify that directory exists. 8994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if file: 9004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pwd = self.ftp.pwd() 9014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 9024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 9034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.cwd(file) 9044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ftplib.error_perm, reason: 9054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise IOError, ('ftp error', reason), sys.exc_info()[2] 9064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 9074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.cwd(pwd) 9084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cmd = 'LIST ' + file 9094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 9104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cmd = 'LIST' 9114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao conn, retrlen = self.ftp.ntransfercmd(cmd) 9124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.busy = 1 9134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 9144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.refcount += 1 9154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao conn.close() 9164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Pass back both a suitably decorated object and a retrieval length 9174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (ftpobj, retrlen) 9184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def endtransfer(self): 9204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not self.busy: 9214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 9224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.busy = 0 9234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 9244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.voidresp() 9254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ftperrors(): 9264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 9274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def close(self): 9294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.keepalive = False 9304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.refcount <= 0: 9314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.real_close() 9324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def file_close(self): 9344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.endtransfer() 9354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.refcount -= 1 9364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.refcount <= 0 and not self.keepalive: 9374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.real_close() 9384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def real_close(self): 9404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.endtransfer() 9414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 9424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.ftp.close() 9434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ftperrors(): 9444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 9454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addbase: 9474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Base class for addinfo and addclosehook.""" 9484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, fp): 9504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.fp = fp 9514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.read = self.fp.read 9524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.readline = self.fp.readline 9534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 9544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(self.fp, "fileno"): 9554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.fileno = self.fp.fileno 9564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 9574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.fileno = lambda: None 9584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(self.fp, "__iter__"): 9594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__iter__ = self.fp.__iter__ 9604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(self.fp, "next"): 9614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.next = self.fp.next 9624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __repr__(self): 9644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 9654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao id(self), self.fp) 9664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def close(self): 9684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.read = None 9694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.readline = None 9704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.readlines = None 9714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.fileno = None 9724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.fp: self.fp.close() 9734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.fp = None 9744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addclosehook(addbase): 9764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Class to add a close hook to an open file.""" 9774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, fp, closehook, *hookargs): 9794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao addbase.__init__(self, fp) 9804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.closehook = closehook 9814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.hookargs = hookargs 9824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def close(self): 9844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.closehook: 9854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.closehook(*self.hookargs) 9864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.closehook = None 9874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.hookargs = None 9884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao addbase.close(self) 9894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addinfo(addbase): 9914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """class to add an info() method to an open file.""" 9924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, fp, headers): 9944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao addbase.__init__(self, fp) 9954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.headers = headers 9964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def info(self): 9984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.headers 9994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass addinfourl(addbase): 10014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """class to add info() and geturl() methods to an open file.""" 10024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, fp, headers, url, code=None): 10044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao addbase.__init__(self, fp) 10054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.headers = headers 10064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.url = url 10074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.code = code 10084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def info(self): 10104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.headers 10114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getcode(self): 10134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.code 10144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def geturl(self): 10164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.url 10174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Utilities to parse URLs (most of these return None for missing parts): 10204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# unwrap('<URL:type://host/path>') --> 'type://host/path' 10214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splittype('type:opaquestring') --> 'type', 'opaquestring' 10224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splithost('//host[:port]/path') --> 'host[:port]', '/path' 10234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 10244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitpasswd('user:passwd') -> 'user', 'passwd' 10254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitport('host:port') --> 'host', 'port' 10264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitquery('/path?query') --> '/path', 'query' 10274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splittag('/path#tag') --> '/path', 'tag' 10284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitattr('/path;attr1=value1;attr2=value2;...') -> 10294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# '/path', ['attr1=value1', 'attr2=value2', ...] 10304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splitvalue('attr=value') --> 'attr', 'value' 10314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# unquote('abc%20def') -> 'abc def' 10324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# quote('abc def') -> 'abc%20def') 10334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotry: 10354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao unicode 10364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoexcept NameError: 10374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _is_unicode(x): 10384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 0 10394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse: 10404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _is_unicode(x): 10414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return isinstance(x, unicode) 10424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef toBytes(url): 10444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """toBytes(u"URL") --> 'URL'.""" 10454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Most URL schemes require ASCII. If that changes, the conversion 10464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # can be relaxed 10474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _is_unicode(url): 10484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 10494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = url.encode("ASCII") 10504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except UnicodeError: 10514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise UnicodeError("URL " + repr(url) + 10524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao " contains non-ASCII characters") 10534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return url 10544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unwrap(url): 10564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 10574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = url.strip() 10584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if url[:1] == '<' and url[-1:] == '>': 10594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = url[1:-1].strip() 10604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if url[:4] == 'URL:': url = url[4:].strip() 10614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return url 10624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_typeprog = None 10644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splittype(url): 10654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 10664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _typeprog 10674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _typeprog is None: 10684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 10694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _typeprog = re.compile('^([^/:]+):') 10704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _typeprog.match(url) 10724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: 10734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao scheme = match.group(1) 10744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return scheme.lower(), url[len(scheme) + 1:] 10754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None, url 10764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_hostprog = None 10784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splithost(url): 10794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 10804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _hostprog 10814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _hostprog is None: 10824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 10834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _hostprog = re.compile('^//([^/?]*)(.*)$') 10844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _hostprog.match(url) 10864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: 10874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host_port = match.group(1) 10884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = match.group(2) 10894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if path and not path.startswith('/'): 10904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = '/' + path 10914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return host_port, path 10924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None, url 10934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_userprog = None 10954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splituser(host): 10964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 10974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _userprog 10984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _userprog is None: 10994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _userprog = re.compile('^(.*)@(.*)$') 11014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _userprog.match(host) 11034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: return match.group(1, 2) 11044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None, host 11054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_passwdprog = None 11074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitpasswd(user): 11084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 11094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _passwdprog 11104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _passwdprog is None: 11114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 11134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _passwdprog.match(user) 11154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: return match.group(1, 2) 11164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return user, None 11174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# splittag('/path#tag') --> '/path', 'tag' 11194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_portprog = None 11204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitport(host): 11214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splitport('host:port') --> 'host', 'port'.""" 11224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _portprog 11234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _portprog is None: 11244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _portprog = re.compile('^(.*):([0-9]+)$') 11264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _portprog.match(host) 11284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: return match.group(1, 2) 11294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return host, None 11304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_nportprog = None 11324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitnport(host, defport=-1): 11334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Split host and port, returning numeric port. 11344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Return given default port if no ':' found; defaults to -1. 11354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Return numerical port if a valid number are found after ':'. 11364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Return None if ':' but not a valid number.""" 11374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _nportprog 11384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _nportprog is None: 11394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _nportprog = re.compile('^(.*):(.*)$') 11414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _nportprog.match(host) 11434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: 11444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host, port = match.group(1, 2) 11454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 11464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not port: raise ValueError, "no digits" 11474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao nport = int(port) 11484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 11494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao nport = None 11504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return host, nport 11514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return host, defport 11524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_queryprog = None 11544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitquery(url): 11554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splitquery('/path?query') --> '/path', 'query'.""" 11564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _queryprog 11574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _queryprog is None: 11584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _queryprog = re.compile('^(.*)\?([^?]*)$') 11604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _queryprog.match(url) 11624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: return match.group(1, 2) 11634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return url, None 11644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_tagprog = None 11664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splittag(url): 11674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splittag('/path#tag') --> '/path', 'tag'.""" 11684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _tagprog 11694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _tagprog is None: 11704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _tagprog = re.compile('^(.*)#([^#]*)$') 11724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _tagprog.match(url) 11744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: return match.group(1, 2) 11754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return url, None 11764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitattr(url): 11784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splitattr('/path;attr1=value1;attr2=value2;...') -> 11794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao '/path', ['attr1=value1', 'attr2=value2', ...].""" 11804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao words = url.split(';') 11814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return words[0], words[1:] 11824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_valueprog = None 11844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef splitvalue(attr): 11854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """splitvalue('attr=value') --> 'attr', 'value'.""" 11864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global _valueprog 11874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _valueprog is None: 11884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 11894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _valueprog = re.compile('^([^=]*)=(.*)$') 11904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao match = _valueprog.match(attr) 11924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if match: return match.group(1, 2) 11934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return attr, None 11944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# urlparse contains a duplicate of this method to avoid a circular import. If 11964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# you update this method, also update the copy in urlparse. This code 11974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# duplication does not exist in Python3. 11984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_hexdig = '0123456789ABCDEFabcdef' 12004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_hextochr = dict((a + b, chr(int(a + b, 16))) 12014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for a in _hexdig for b in _hexdig) 12024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_asciire = re.compile('([\x00-\x7f]+)') 12034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unquote(s): 12054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """unquote('abc%20def') -> 'abc def'.""" 12064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if _is_unicode(s): 12074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if '%' not in s: 12084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return s 12094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bits = _asciire.split(s) 12104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao res = [bits[0]] 12114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append = res.append 12124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for i in range(1, len(bits), 2): 12134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append(unquote(str(bits[i])).decode('latin1')) 12144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append(bits[i + 1]) 12154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return ''.join(res) 12164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bits = s.split('%') 12184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # fastpath 12194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if len(bits) == 1: 12204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return s 12214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao res = [bits[0]] 12224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append = res.append 12234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for item in bits[1:]: 12244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 12254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append(_hextochr[item[:2]]) 12264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append(item[2:]) 12274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except KeyError: 12284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append('%') 12294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao append(item) 12304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return ''.join(res) 12314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unquote_plus(s): 12334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """unquote('%7e/abc+def') -> '~/abc def'""" 12344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao s = s.replace('+', ' ') 12354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return unquote(s) 12364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoalways_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 12384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'abcdefghijklmnopqrstuvwxyz' 12394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao '0123456789' '_.-') 12404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_safe_map = {} 12414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofor i, c in zip(xrange(256), str(bytearray(xrange(256)))): 12424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 12434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao_safe_quoters = {} 12444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef quote(s, safe='/'): 12464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """quote('abc def') -> 'abc%20def' 12474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Each part of a URL, e.g. the path info, the query, etc., has a 12494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao different set of reserved characters that must be quoted. 12504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 12524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao the following reserved characters. 12534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 12554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "$" | "," 12564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Each of these characters is reserved in some component of a URL, 12584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao but not necessarily in all of them. 12594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao By default, the quote function is intended for quoting the path 12614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao section of a URL. Thus, it will not encode '/'. This character 12624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao is reserved, but in typical usage the quote function is being 12634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao called on a path where the existing slash characters are used as 12644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao reserved characters. 12654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 12664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # fastpath 12674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not s: 12684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if s is None: 12694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise TypeError('None object cannot be quoted') 12704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return s 12714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cachekey = (safe, always_safe) 12724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 12734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (quoter, safe) = _safe_quoters[cachekey] 12744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except KeyError: 12754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao safe_map = _safe_map.copy() 12764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao safe_map.update([(c, c) for c in safe]) 12774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao quoter = safe_map.__getitem__ 12784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao safe = always_safe + safe 12794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _safe_quoters[cachekey] = (quoter, safe) 12804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not s.rstrip(safe): 12814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return s 12824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return ''.join(map(quoter, s)) 12834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef quote_plus(s, safe=''): 12854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Quote the query fragment of a URL; replacing ' ' with '+'""" 12864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ' ' in s: 12874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao s = quote(s, safe + ' ') 12884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return s.replace(' ', '+') 12894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return quote(s, safe) 12904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef urlencode(query, doseq=0): 12924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Encode a sequence of two-element tuples or dictionary into a URL query string. 12934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If any values in the query arg are sequences and doseq is true, each 12954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao sequence element is converted to a separate parameter. 12964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If the query arg is a sequence of two-element tuples, the order of the 12984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parameters in the output will match the order of parameters in the 12994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao input. 13004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 13014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hasattr(query,"items"): 13034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # mapping objects 13044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao query = query.items() 13054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 13064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # it's a bother at times that strings and string-like objects are 13074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # sequences... 13084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 13094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # non-sequence items should not work with len() 13104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # non-empty strings will fail this 13114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if len(query) and not isinstance(query[0], tuple): 13124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise TypeError 13134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # zero-length sequences of all types will get here and succeed, 13144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # but that's a minor nit - since the original implementation 13154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # allowed empty dicts that type of behavior probably should be 13164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # preserved for consistency 13174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except TypeError: 13184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ty,va,tb = sys.exc_info() 13194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise TypeError, "not a valid non-string sequence or mapping object", tb 13204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao l = [] 13224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not doseq: 13234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # preserve old behavior 13244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for k, v in query: 13254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = quote_plus(str(k)) 13264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = quote_plus(str(v)) 13274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao l.append(k + '=' + v) 13284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 13294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for k, v in query: 13304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = quote_plus(str(k)) 13314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if isinstance(v, str): 13324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = quote_plus(v) 13334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao l.append(k + '=' + v) 13344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif _is_unicode(v): 13354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # is there a reasonable way to convert to ASCII? 13364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # encode generates a string, but "replace" or "ignore" 13374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # lose information and "strict" can raise UnicodeError 13384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = quote_plus(v.encode("ASCII","replace")) 13394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao l.append(k + '=' + v) 13404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 13414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 13424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # is this a sufficient test for sequence-ness? 13434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao len(v) 13444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except TypeError: 13454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # not a sequence 13464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = quote_plus(str(v)) 13474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao l.append(k + '=' + v) 13484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 13494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # loop over the sequence 13504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for elt in v: 13514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao l.append(k + '=' + quote_plus(str(elt))) 13524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return '&'.join(l) 13534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Proxy handling 13554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef getproxies_environment(): 13564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 13574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Scan the environment for variables named <scheme>_proxy; 13594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao this seems to be the standard convention. If you need a 13604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao different way, you can pass a proxies dictionary to the 13614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [Fancy]URLopener constructor. 13624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 13644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies = {} 13654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for name, value in os.environ.items(): 13664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = name.lower() 13674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if value and name[-6:] == '_proxy': 13684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies[name[:-6]] = value 13694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxies 13704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef proxy_bypass_environment(host): 13724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Test if proxies should not be used for a particular host. 13734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Checks the environment for a variable named no_proxy, which should 13754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao be a list of DNS suffixes separated by commas, or '*' for all hosts. 13764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 13774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 13784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # '*' is special case for always bypass 13794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if no_proxy == '*': 13804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 1 13814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # strip port off host 13824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hostonly, port = splitport(host) 13834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # check if the host ends with any of the DNS suffixes 13844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 13854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for name in no_proxy_list: 13864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if name and (hostonly.endswith(name) or host.endswith(name)): 13874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 1 13884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # otherwise, don't bypass 13894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 0 13904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif sys.platform == 'darwin': 13934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from _scproxy import _get_proxy_settings, _get_proxies 13944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def proxy_bypass_macosx_sysconf(host): 13964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 13974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Return True iff this host shouldn't be accessed using a proxy 13984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This function uses the MacOSX framework SystemConfiguration 14004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao to fetch the proxy information. 14014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 14024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 14034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import socket 14044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao from fnmatch import fnmatch 14054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hostonly, port = splitport(host) 14074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def ip2num(ipAddr): 14094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parts = ipAddr.split('.') 14104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parts = map(int, parts) 14114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if len(parts) != 4: 14124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parts = (parts + [0, 0, 0, 0])[:4] 14134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 14144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_settings = _get_proxy_settings() 14164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Check for simple host names: 14184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if '.' not in host: 14194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxy_settings['exclude_simple']: 14204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 14214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hostIP = None 14234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for value in proxy_settings.get('exceptions', ()): 14254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Items in the list are strings like these: *.local, 169.254/16 14264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not value: continue 14274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 14294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m is not None: 14304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hostIP is None: 14314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 14324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hostIP = socket.gethostbyname(hostonly) 14334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hostIP = ip2num(hostIP) 14344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except socket.error: 14354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 14364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao base = ip2num(m.group(1)) 14384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mask = m.group(2) 14394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if mask is None: 14404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mask = 8 * (m.group(1).count('.') + 1) 14414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 14434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mask = int(mask[1:]) 14444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mask = 32 - mask 14454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (hostIP >> mask) == (base >> mask): 14474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 14484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif fnmatch(host, value): 14504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 14514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 14534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getproxies_macosx_sysconf(): 14554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 14564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This function uses the MacOSX framework SystemConfiguration 14584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao to fetch the proxy information. 14594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 14604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _get_proxies() 14614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def proxy_bypass(host): 14634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if getproxies_environment(): 14644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxy_bypass_environment(host) 14654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 14664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxy_bypass_macosx_sysconf(host) 14674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getproxies(): 14694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getproxies_environment() or getproxies_macosx_sysconf() 14704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelif os.name == 'nt': 14724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getproxies_registry(): 14734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 14744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Win32 uses the registry to store proxies. 14764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 14784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies = {} 14794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 14804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import _winreg 14814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ImportError: 14824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Std module, so should be around - but you never know! 14834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxies 14844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 14854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 14864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 14874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyEnable = _winreg.QueryValueEx(internetSettings, 14884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'ProxyEnable')[0] 14894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxyEnable: 14904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Returned as Unicode but problems if not converted to ASCII 14914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyServer = str(_winreg.QueryValueEx(internetSettings, 14924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'ProxyServer')[0]) 14934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if '=' in proxyServer: 14944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Per-protocol settings 14954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for p in proxyServer.split(';'): 14964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao protocol, address = p.split('=', 1) 14974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # See if address has a type:// prefix 14984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 14994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not re.match('^([^/:]+)://', address): 15004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao address = '%s://%s' % (protocol, address) 15014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies[protocol] = address 15024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 15034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Use one setting for all protocols 15044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if proxyServer[:5] == 'http:': 15054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies['http'] = proxyServer 15064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 15074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies['http'] = 'http://%s' % proxyServer 15084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies['https'] = 'https://%s' % proxyServer 15094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxies['ftp'] = 'ftp://%s' % proxyServer 15104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao internetSettings.Close() 15114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except (WindowsError, ValueError, TypeError): 15124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Either registry key not found etc, or the value in an 15134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # unexpected format. 15144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # proxies already set up to be empty so nothing to do 15154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 15164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxies 15174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getproxies(): 15194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 15204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Returns settings gathered from the environment, if specified, 15224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao or the registry. 15234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 15254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return getproxies_environment() or getproxies_registry() 15264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def proxy_bypass_registry(host): 15284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import _winreg 15304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import re 15314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ImportError: 15324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Std modules, so should be around - but you never know! 15334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 0 15344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 15364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 15374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyEnable = _winreg.QueryValueEx(internetSettings, 15384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'ProxyEnable')[0] 15394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyOverride = str(_winreg.QueryValueEx(internetSettings, 15404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'ProxyOverride')[0]) 15414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # ^^^^ Returned as Unicode but problems if not converted to ASCII 15424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except WindowsError: 15434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 0 15444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not proxyEnable or not proxyOverride: 15454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 0 15464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # try to make a host list from name and IP address. 15474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rawHost, port = splitport(host) 15484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = [rawHost] 15494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao addr = socket.gethostbyname(rawHost) 15514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if addr != rawHost: 15524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host.append(addr) 15534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except socket.error: 15544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 15554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fqdn = socket.getfqdn(rawHost) 15574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if fqdn != rawHost: 15584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host.append(fqdn) 15594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except socket.error: 15604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 15614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # make a check value list from the registry entry: replace the 15624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # '<local>' string by the localhost entry and the corresponding 15634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # canonical entry. 15644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxyOverride = proxyOverride.split(';') 15654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # now check if we match one of the registry values. 15664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for test in proxyOverride: 15674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if test == '<local>': 15684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if '.' not in rawHost: 15694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 1 15704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao test = test.replace(".", r"\.") # mask dots 15714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao test = test.replace("*", r".*") # change glob sequence 15724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao test = test.replace("?", r".") # change glob char 15734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for val in host: 15744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # print "%s <--> %s" %( test, val ) 15754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if re.match(test, val, re.I): 15764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 1 15774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 0 15784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def proxy_bypass(host): 15804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 15814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Returns settings gathered from the environment, if specified, 15834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao or the registry. 15844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 15864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if getproxies_environment(): 15874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxy_bypass_environment(host) 15884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 15894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return proxy_bypass_registry(host) 15904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoelse: 15924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # By default use environment variables 15934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao getproxies = getproxies_environment 15944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao proxy_bypass = proxy_bypass_environment 15954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Test and time quote() and unquote() 15974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef test1(): 15984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao s = '' 15994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for i in range(256): s = s + chr(i) 16004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao s = s*4 16014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao t0 = time.time() 16024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao qs = quote(s) 16034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao uqs = unquote(qs) 16044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao t1 = time.time() 16054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if uqs != s: 16064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print 'Wrong!' 16074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print repr(s) 16084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print repr(qs) 16094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print repr(uqs) 16104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print round(t1 - t0, 3), 'sec' 16114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef reporthook(blocknum, blocksize, totalsize): 16144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Report during remote transfers 16154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao print "Block number: %d, Block size: %d, Total size: %d" % ( 16164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao blocknum, blocksize, totalsize) 1617