10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Open an arbitrary URL. 20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 30a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSee the following document for more info on URLs: 40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"Names and Addresses, URIs, URLs, URNs, URCs", at 50a8c90248264a8b26970b4473770bcc3df8515fJosh Gaohttp://www.w3.org/pub/WWW/Addressing/Overview.html 60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 70a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSee also the HTTP spec (from which the error codes are derived): 80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"HTTP - Hypertext Transfer Protocol", at 90a8c90248264a8b26970b4473770bcc3df8515fJosh Gaohttp://www.w3.org/pub/WWW/Protocols/ 100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 110a8c90248264a8b26970b4473770bcc3df8515fJosh GaoRelated standards and specs: 120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao- RFC1808: the "relative URL" spec. (authoritative status) 130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao- RFC1738 - the "URL standard". (authoritative status) 140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao- RFC1630 - the "URI spec". (informational status) 150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 160a8c90248264a8b26970b4473770bcc3df8515fJosh GaoThe object returned by URLopener().open(file) will differ per 170a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoprotocol. All you know is that is has methods read(), readline(), 180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoreadlines(), fileno(), close() and info(). The read*(), fileno() 190a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoand close() methods work like those of open files. 200a8c90248264a8b26970b4473770bcc3df8515fJosh GaoThe info() method returns a mimetools.Message object which can be 210a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoused to query various info about the object, if available. 220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao(mimetools.Message objects are queried with the getheader() method.) 230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao""" 240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 250a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport string 260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport socket 270a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport os 280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport time 290a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys 300a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport base64 310a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re 320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom urlparse import urljoin as basejoin 340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "urlencode", "url2pathname", "pathname2url", "splittag", 380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "splittype", "splithost", "splituser", "splitpasswd", "splitport", 400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "splitnport", "splitquery", "splitattr", "splitvalue", 410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "getproxies"] 420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__version__ = '1.17' # XXX This version is not always updated :-( 440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 450a8c90248264a8b26970b4473770bcc3df8515fJosh GaoMAXFTPCACHE = 10 # Trim the ftp cache beyond this size 460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Helper for non-unix systems 480a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif os.name == 'nt': 490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from nturl2path import url2pathname, pathname2url 500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelif os.name == 'riscos': 510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from rourl2path import url2pathname, pathname2url 520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse: 530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def url2pathname(pathname): 540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """OS-specific conversion from a relative URL of the 'file' scheme 550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao to a file system path; not recommended for general use.""" 560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return unquote(pathname) 570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def pathname2url(pathname): 590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """OS-specific conversion from a file system path to a relative URL 600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao of the 'file' scheme; not recommended for general use.""" 610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return quote(pathname) 620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# This really consists of two pieces: 640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (1) a class which handles opening of all sorts of URLs 650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (plus assorted utilities etc.) 660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (2) a set of functions for parsing URLs 670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# XXX Should these be separated out into different modules? 680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Shortcut for basic usage 710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_urlopener = None 720a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlopen(url, data=None, proxies=None): 730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Create a file-like object for the specified URL to read from.""" 740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from warnings import warnpy3k 750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "favor of urllib2.urlopen()", stacklevel=2) 770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _urlopener 790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxies is not None: 800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao opener = FancyURLopener(proxies=proxies) 810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif not _urlopener: 820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao opener = FancyURLopener() 830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _urlopener = opener 840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao opener = _urlopener 860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return opener.open(url) 880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return opener.open(url, data) 900a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlretrieve(url, filename=None, reporthook=None, data=None): 910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _urlopener 920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not _urlopener: 930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _urlopener = FancyURLopener() 940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _urlopener.retrieve(url, filename, reporthook, data) 950a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlcleanup(): 960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _urlopener: 970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _urlopener.cleanup() 980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _safe_quoters.clear() 990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ftpcache.clear() 1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# check for SSL 1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import ssl 1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept: 1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _have_ssl = False 1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse: 1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _have_ssl = True 1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# exception raised when downloaded size does not match content-length 1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ContentTooShortError(IOError): 1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, message, content): 1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao IOError.__init__(self, message) 1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.content = content 1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoftpcache = {} 1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass URLopener: 1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class to open URLs. 1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This is a class rather than just a subroutine because we may need 1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao more than one set of global protocol-specific options. 1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Note -- this is a base class for those who don't want the 1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao automatic handling of errors type 302 (relocated) and 401 1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (authorization needed).""" 1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __tempfiles = None 1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao version = "Python-urllib/%s" % __version__ 1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Constructor 1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, proxies=None, **x509): 1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxies is None: 1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies = getproxies() 1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.proxies = proxies 1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key_file = x509.get('key_file') 1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.cert_file = x509.get('cert_file') 1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.addheaders = [('User-Agent', self.version)] 1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__tempfiles = [] 1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__unlink = os.unlink # See cleanup() 1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tempcache = None 1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Undocumented feature: if you assign {} to tempcache, 1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # it is used to cache files retrieved with 1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # self.retrieve(). This is not enabled by default 1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # since it does not work for changing documents (and I 1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # haven't got the logic to check expiration headers 1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # yet). 1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftpcache = ftpcache 1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Undocumented feature: you can use a different 1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ftp cache by assigning to the .ftpcache member; 1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in case you want logically independent URL openers 1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # XXX This is not threadsafe. Bah. 1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __del__(self): 1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.close() 1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.cleanup() 1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def cleanup(self): 1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This code sometimes runs when the rest of this module 1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # has already been deleted, so it can't use any globals 1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # or import anything. 1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.__tempfiles: 1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for file in self.__tempfiles: 1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__unlink(file) 1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except OSError: 1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.__tempfiles[:] 1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.tempcache: 1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tempcache.clear() 1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def addheader(self, *args): 1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Add a header to be used by the HTTP interface only 1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao e.g. u.addheader('Accept', 'sound/basic')""" 1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.addheaders.append(args) 1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # External interface 1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open(self, fullurl, data=None): 1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use URLopener().open(file) instead of open(file, 'r').""" 1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fullurl = unwrap(toBytes(fullurl)) 1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # percent encode url, fixing lame server errors for e.g, like space 1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # within url paths. 1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.tempcache and fullurl in self.tempcache: 1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filename, headers = self.tempcache[fullurl] 1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = open(filename, 'rb') 1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(fp, headers, fullurl) 1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype, url = splittype(fullurl) 1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not urltype: 1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype = 'file' 1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if urltype in self.proxies: 1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy = self.proxies[urltype] 1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype, proxyhost = splittype(proxy) 1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(proxyhost) 1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = (host, fullurl) # Signal special case to open_*() 1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy = None 1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = 'open_' + urltype 1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.type = urltype 2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = name.replace('-', '_') 2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not hasattr(self, name): 2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy: 2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open_unknown_proxy(proxy, fullurl, data) 2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open_unknown(fullurl, data) 2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getattr(self, name)(url) 2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getattr(self, name)(url, data) 2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except socket.error, msg: 2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('socket error', msg), sys.exc_info()[2] 2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_unknown(self, fullurl, data=None): 2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Overridable interface to open unknown URL type.""" 2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type, url = splittype(fullurl) 2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('url error', 'unknown url type', type) 2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_unknown_proxy(self, proxy, fullurl, data=None): 2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Overridable interface to open unknown URL type.""" 2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type, url = splittype(fullurl) 2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # External interface 2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def retrieve(self, url, filename=None, reporthook=None, data=None): 2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """retrieve(url) returns (filename, headers) for a local object 2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao or (tempfilename, headers) for a remote object.""" 2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = unwrap(toBytes(url)) 2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.tempcache and url in self.tempcache: 2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.tempcache[url] 2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type, url1 = splittype(url) 2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if filename is None and (not type or type == 'file'): 2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = self.open_local_file(url1) 2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao hdrs = fp.info() 2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return url2pathname(splithost(url1)[1]), hdrs 2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = self.open(url, data) 2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers = fp.info() 2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if filename: 2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tfp = open(filename, 'wb') 2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import tempfile 2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao garbage, path = splittype(url) 2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao garbage, path = splithost(path or "") 2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path, garbage = splitquery(path or "") 2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path, garbage = splitattr(path or "") 2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao suffix = os.path.splitext(path)[1] 2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (fd, filename) = tempfile.mkstemp(suffix) 2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__tempfiles.append(filename) 2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tfp = os.fdopen(fd, 'wb') 2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = filename, headers 2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.tempcache is not None: 2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tempcache[url] = result 2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao bs = 1024*8 2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size = -1 2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao read = 0 2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocknum = 0 2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "content-length" in headers: 2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size = int(headers["Content-Length"]) 2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if reporthook: 2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao reporthook(blocknum, bs, size) 2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while 1: 2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao block = fp.read(bs) 2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if block == "": 2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao read += len(block) 2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tfp.write(block) 2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocknum += 1 2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if reporthook: 2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao reporthook(blocknum, bs, size) 2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tfp.close() 2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # raise exception if actual size does not match content-length header 2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if size >= 0 and read < size: 2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ContentTooShortError("retrieval incomplete: got only %i out " 2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "of %i bytes" % (read, size), result) 2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return result 2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Each method named open_<type> knows how to open that type of URL 2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_http(self, url, data=None): 2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use HTTP protocol.""" 2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import httplib 2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd = None 2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_passwd= None 2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(url, str): 2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(url) 2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if host: 2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd, host = splituser(host) 2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = unquote(host) 3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realhost = host 3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = url 3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check whether the proxy contains authorization information 3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_passwd, host = splituser(host) 3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # now we proceed with the url we want to obtain 3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype, rest = splittype(selector) 3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = rest 3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd = None 3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if urltype.lower() != 'http': 3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realhost = None 3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realhost, rest = splithost(rest) 3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if realhost: 3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd, realhost = splituser(realhost) 3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if user_passwd: 3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao selector = "%s://%s%s" % (urltype, realhost, rest) 3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy_bypass(realhost): 3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = realhost 3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #print "proxy via http:", host, selector 3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not host: raise IOError, ('http error', 'no host given') 3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy_passwd: 3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_passwd = unquote(proxy_passwd) 3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_auth = base64.b64encode(proxy_passwd).strip() 3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_auth = None 3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if user_passwd: 3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd = unquote(user_passwd) 3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao auth = base64.b64encode(user_passwd).strip() 3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao auth = None 3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h = httplib.HTTP(host) 3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is not None: 3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putrequest('POST', selector) 3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putheader('Content-Type', 'application/x-www-form-urlencoded') 3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putheader('Content-Length', '%d' % len(data)) 3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putrequest('GET', selector) 3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if auth: h.putheader('Authorization', 'Basic %s' % auth) 3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if realhost: h.putheader('Host', realhost) 3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for args in self.addheaders: h.putheader(*args) 3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.endheaders(data) 3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers = h.getreply() 3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = h.getfile() 3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if errcode == -1: 3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fp: fp.close() 3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # something went wrong with the HTTP status line 3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('http protocol error', 0, 3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'got a bad status line', None) 3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # According to RFC 2616, "2xx" code indicates that the client's 3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # request was successfully received, understood, and accepted. 3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (200 <= errcode < 300): 3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(fp, headers, "http:" + url, errcode) 3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error(url, fp, errcode, errmsg, headers) 3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error(url, fp, errcode, errmsg, headers, data) 3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error(self, url, fp, errcode, errmsg, headers, data=None): 3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Handle http errors. 3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Derived class can override this, or provide specific handlers 3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao named http_error_DDD where DDD is the 3-digit error code.""" 3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # First check if there's a specific handler for this error 3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = 'http_error_%d' % errcode 3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self, name): 3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao method = getattr(self, name) 3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = method(url, fp, errcode, errmsg, headers) 3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = method(url, fp, errcode, errmsg, headers, data) 3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if result: return result 3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error_default(url, fp, errcode, errmsg, headers) 3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_default(self, url, fp, errcode, errmsg, headers): 3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Default error handler: close the connection and raise IOError.""" 3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('http error', errcode, errmsg, headers) 3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _have_ssl: 3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_https(self, url, data=None): 3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use HTTPS protocol.""" 3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import httplib 3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd = None 3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_passwd = None 3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(url, str): 3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(url) 3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if host: 3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd, host = splituser(host) 3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = unquote(host) 3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realhost = host 3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = url 3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # here, we determine, whether the proxy contains authorization information 3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_passwd, host = splituser(host) 4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype, rest = splittype(selector) 4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = rest 4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd = None 4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if urltype.lower() != 'https': 4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realhost = None 4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realhost, rest = splithost(rest) 4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if realhost: 4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd, realhost = splituser(realhost) 4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if user_passwd: 4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao selector = "%s://%s%s" % (urltype, realhost, rest) 4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #print "proxy via https:", host, selector 4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not host: raise IOError, ('https error', 'no host given') 4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy_passwd: 4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_passwd = unquote(proxy_passwd) 4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_auth = base64.b64encode(proxy_passwd).strip() 4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_auth = None 4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if user_passwd: 4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user_passwd = unquote(user_passwd) 4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao auth = base64.b64encode(user_passwd).strip() 4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao auth = None 4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h = httplib.HTTPS(host, 0, 4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao key_file=self.key_file, 4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cert_file=self.cert_file) 4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is not None: 4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putrequest('POST', selector) 4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putheader('Content-Type', 4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'application/x-www-form-urlencoded') 4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putheader('Content-Length', '%d' % len(data)) 4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.putrequest('GET', selector) 4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if auth: h.putheader('Authorization', 'Basic %s' % auth) 4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if realhost: h.putheader('Host', realhost) 4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for args in self.addheaders: h.putheader(*args) 4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h.endheaders(data) 4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers = h.getreply() 4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = h.getfile() 4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if errcode == -1: 4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fp: fp.close() 4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # something went wrong with the HTTP status line 4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('http protocol error', 0, 4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'got a bad status line', None) 4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # According to RFC 2616, "2xx" code indicates that the client's 4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # request was successfully received, understood, and accepted. 4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (200 <= errcode < 300): 4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(fp, headers, "https:" + url, errcode) 4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error(url, fp, errcode, errmsg, headers) 4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error(url, fp, errcode, errmsg, headers, 4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data) 4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_file(self, url): 4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use local file or FTP depending on form of URL.""" 4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(url, str): 4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open_ftp(url) 4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open_local_file(url) 4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_local_file(self, url): 4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use local file.""" 4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import mimetypes, mimetools, email.utils 4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from cStringIO import StringIO 4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from StringIO import StringIO 4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, file = splithost(url) 4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao localname = url2pathname(file) 4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stats = os.stat(localname) 4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except OSError, e: 4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError(e.errno, e.strerror, e.filename) 4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size = stats.st_size 4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mtype = mimetypes.guess_type(url)[0] 4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers = mimetools.Message(StringIO( 4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (mtype or 'text/plain', size, modified))) 4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not host: 4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urlfile = file 4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file[:1] == '/': 4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urlfile = 'file://' + file 4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif file[:2] == './': 4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(open(localname, 'rb'), 4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers, urlfile) 4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, port = splitport(host) 4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not port \ 4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and socket.gethostbyname(host) in (localhost(), thishost()): 4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urlfile = file 4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file[:1] == '/': 4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urlfile = 'file://' + file 4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(open(localname, 'rb'), 4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers, urlfile) 5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('local file error', 'not on local host') 5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_ftp(self, url): 5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use FTP protocol.""" 5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(url, str): 5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import mimetypes, mimetools 5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from cStringIO import StringIO 5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from StringIO import StringIO 5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, path = splithost(url) 5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not host: raise IOError, ('ftp error', 'no host given') 5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, port = splitport(host) 5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user, host = splituser(host) 5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if user: user, passwd = splitpasswd(user) 5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: passwd = None 5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = unquote(host) 5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user = user or '' 5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao passwd = passwd or '' 5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = socket.gethostbyname(host) 5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not port: 5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import ftplib 5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao port = ftplib.FTP_PORT 5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao port = int(port) 5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path, attrs = splitattr(path) 5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = unquote(path) 5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dirs = path.split('/') 5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dirs, file = dirs[:-1], dirs[-1] 5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if dirs and not dirs[0]: dirs = dirs[1:] 5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if dirs and not dirs[0]: dirs[0] = '/' 5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao key = user, host, port, '/'.join(dirs) 5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # XXX thread unsafe! 5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(self.ftpcache) > MAXFTPCACHE: 5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Prune the cache, rather arbitrarily 5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for k in self.ftpcache.keys(): 5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if k != key: 5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = self.ftpcache[k] 5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.ftpcache[k] 5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v.close() 5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not key in self.ftpcache: 5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftpcache[key] = \ 5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ftpwrapper(user, passwd, host, port, dirs) 5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not file: type = 'D' 5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: type = 'I' 5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for attr in attrs: 5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attr, value = splitvalue(attr) 5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if attr.lower() == 'type' and \ 5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value in ('a', 'A', 'i', 'I', 'd', 'D'): 5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = value.upper() 5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mtype = mimetypes.guess_type("ftp:" + url)[0] 5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers = "" 5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mtype: 5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers += "Content-Type: %s\n" % mtype 5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if retrlen is not None and retrlen >= 0: 5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers += "Content-Length: %d\n" % retrlen 5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers = mimetools.Message(StringIO(headers)) 5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(fp, headers, "ftp:" + url) 5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ftperrors(), msg: 5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('ftp error', msg), sys.exc_info()[2] 5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open_data(self, url, data=None): 5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Use "data" URL.""" 5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(url, str): 5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ignore POSTed data 5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # syntax of data URLs: 5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # mediatype := [ type "/" subtype ] *( ";" parameter ) 5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # data := *urlchar 5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # parameter := attribute "=" value 5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import mimetools 5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from cStringIO import StringIO 5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from StringIO import StringIO 5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [type, data] = url.split(',', 1) 5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ValueError: 5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('data error', 'bad data URL') 5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not type: 5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = 'text/plain;charset=US-ASCII' 5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao semi = type.rfind(';') 5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if semi >= 0 and '=' not in type[semi:]: 5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding = type[semi+1:] 5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = type[:semi] 5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding = '' 5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg = [] 5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao time.gmtime(time.time()))) 5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg.append('Content-type: %s' % type) 5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if encoding == 'base64': 5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = base64.decodestring(data) 5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = unquote(data) 6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg.append('Content-Length: %d' % len(data)) 6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg.append('') 6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg.append(data) 6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao msg = '\n'.join(msg) 6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao f = StringIO(msg) 6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers = mimetools.Message(f, 0) 6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #f.fileno = None # needed for addinfourl 6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(f, headers, url) 6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FancyURLopener(URLopener): 6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Derived class with handlers for errors we can handle (perhaps).""" 6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, *args, **kwargs): 6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.__init__(self, *args, **kwargs) 6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.auth_cache = {} 6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tries = 0 6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.maxtries = 10 6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_default(self, url, fp, errcode, errmsg, headers): 6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Default error handling -- don't raise an exception.""" 6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return addinfourl(fp, headers, "http:" + url, errcode) 6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Error 302 -- relocated (temporarily).""" 6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tries += 1 6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.maxtries and self.tries >= self.maxtries: 6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self, "http_error_500"): 6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao meth = self.http_error_500 6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao meth = self.http_error_default 6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tries = 0 6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return meth(url, fp, 500, 6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Internal Server Error: Redirect Recursion", headers) 6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = self.redirect_internal(url, fp, errcode, errmsg, headers, 6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data) 6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tries = 0 6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return result 6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if 'location' in headers: 6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = headers['location'] 6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif 'uri' in headers: 6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = headers['uri'] 6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # In case the server sent a relative URL, join with original: 6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = basejoin(self.type + ":" + url, newurl) 6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # For security reasons we do not allow redirects to protocols 6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # other than HTTP, HTTPS or FTP. 6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl_lower = newurl.lower() 6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not (newurl_lower.startswith('http://') or 6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl_lower.startswith('https://') or 6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl_lower.startswith('ftp://')): 6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError('redirect error', errcode, 6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errmsg + " - Redirection to url '%s' is not allowed" % 6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl, 6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao headers) 6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl) 6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Error 301 -- also relocated (permanently).""" 6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error_302(url, fp, errcode, errmsg, headers, data) 6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Error 303 -- also relocated (essentially identical to 302).""" 6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error_302(url, fp, errcode, errmsg, headers, data) 6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Error 307 -- relocated, but turn POST into error.""" 6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error_302(url, fp, errcode, errmsg, headers, data) 6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.http_error_default(url, fp, errcode, errmsg, headers) 6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Error 401 -- authentication required. 6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This function supports Basic authentication only.""" 6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not 'www-authenticate' in headers: 6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.http_error_default(self, url, fp, 6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers) 6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stuff = headers['www-authenticate'] 6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not match: 6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.http_error_default(self, url, fp, 6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers) 6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao scheme, realm = match.groups() 6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if scheme.lower() != 'basic': 6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.http_error_default(self, url, fp, 6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers) 6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = 'retry_' + self.type + '_basic_auth' 6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getattr(self,name)(url, realm) 6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getattr(self,name)(url, realm, data) 6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Error 407 -- proxy authentication required. 7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This function supports Basic authentication only.""" 7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not 'proxy-authenticate' in headers: 7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.http_error_default(self, url, fp, 7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers) 7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stuff = headers['proxy-authenticate'] 7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not match: 7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.http_error_default(self, url, fp, 7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers) 7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao scheme, realm = match.groups() 7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if scheme.lower() != 'basic': 7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao URLopener.http_error_default(self, url, fp, 7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errcode, errmsg, headers) 7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = 'retry_proxy_' + self.type + '_basic_auth' 7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getattr(self,name)(url, realm) 7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getattr(self,name)(url, realm, data) 7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def retry_proxy_http_basic_auth(self, url, realm, data=None): 7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(url) 7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = 'http://' + host + selector 7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy = self.proxies['http'] 7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype, proxyhost = splittype(proxy) 7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyhost, proxyselector = splithost(proxyhost) 7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao i = proxyhost.find('@') + 1 7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyhost = proxyhost[i:] 7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user, passwd = self.get_user_passwd(proxyhost, realm, i) 7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not (user or passwd): return None 7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.proxies['http'] = 'http://' + proxyhost + proxyselector 7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl) 7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl, data) 7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def retry_proxy_https_basic_auth(self, url, realm, data=None): 7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(url) 7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = 'https://' + host + selector 7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy = self.proxies['https'] 7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao urltype, proxyhost = splittype(proxy) 7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyhost, proxyselector = splithost(proxyhost) 7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao i = proxyhost.find('@') + 1 7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyhost = proxyhost[i:] 7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user, passwd = self.get_user_passwd(proxyhost, realm, i) 7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not (user or passwd): return None 7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.proxies['https'] = 'https://' + proxyhost + proxyselector 7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl) 7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl, data) 7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def retry_http_basic_auth(self, url, realm, data=None): 7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(url) 7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao i = host.find('@') + 1 7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = host[i:] 7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user, passwd = self.get_user_passwd(host, realm, i) 7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not (user or passwd): return None 7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = 'http://' + host + selector 7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl) 7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl, data) 7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def retry_https_basic_auth(self, url, realm, data=None): 7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, selector = splithost(url) 7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao i = host.find('@') + 1 7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = host[i:] 7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user, passwd = self.get_user_passwd(host, realm, i) 7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not (user or passwd): return None 7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newurl = 'https://' + host + selector 7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data is None: 7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl) 7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(newurl, data) 7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def get_user_passwd(self, host, realm, clear_cache=0): 7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao key = realm + '@' + host.lower() 7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if key in self.auth_cache: 7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if clear_cache: 7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.auth_cache[key] 7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.auth_cache[key] 7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user, passwd = self.prompt_user_passwd(host, realm) 7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if user or passwd: self.auth_cache[key] = (user, passwd) 7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return user, passwd 7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def prompt_user_passwd(self, host, realm): 7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Override this in a GUI environment!""" 7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import getpass 7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao user = raw_input("Enter username for %s at %s: " % (realm, 7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host)) 7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao passwd = getpass.getpass("Enter password for %s in %s at %s: " % 8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (user, realm, host)) 8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return user, passwd 8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyboardInterrupt: 8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print 8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None, None 8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Utility functions 8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_localhost = None 8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef localhost(): 8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the IP address of the magic hostname 'localhost'.""" 8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _localhost 8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _localhost is None: 8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _localhost = socket.gethostbyname('localhost') 8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _localhost 8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_thishost = None 8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef thishost(): 8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the IP address of the current host.""" 8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _thishost 8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _thishost is None: 8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _thishost = socket.gethostbyname(socket.gethostname()) 8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _thishost 8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ftperrors = None 8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef ftperrors(): 8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the set of errors raised by the FTP class.""" 8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _ftperrors 8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _ftperrors is None: 8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import ftplib 8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _ftperrors = ftplib.all_errors 8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _ftperrors 8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_noheaders = None 8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef noheaders(): 8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return an empty mimetools.Message object.""" 8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _noheaders 8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _noheaders is None: 8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import mimetools 8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from cStringIO import StringIO 8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from StringIO import StringIO 8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _noheaders = mimetools.Message(StringIO(), 0) 8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _noheaders.fp.close() # Recycle file descriptor 8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _noheaders 8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Utility classes 8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ftpwrapper: 8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class used by open_ftp() for cache of open FTP connections.""" 8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, user, passwd, host, port, dirs, 8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao persistent=True): 8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.user = user 8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.passwd = passwd 8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.host = host 8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.port = port 8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.dirs = dirs 8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.timeout = timeout 8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.refcount = 0 8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.keepalive = persistent 8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.init() 8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def init(self): 8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import ftplib 8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.busy = 0 8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp = ftplib.FTP() 8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.connect(self.host, self.port, self.timeout) 8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.login(self.user, self.passwd) 8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for dir in self.dirs: 8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.cwd(dir) 8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def retrfile(self, file, type): 8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import ftplib 8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.endtransfer() 8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: cmd = 'TYPE ' + type; isdir = 0 8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.voidcmd(cmd) 8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ftplib.all_errors: 8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.init() 8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.voidcmd(cmd) 8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao conn = None 8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file and not isdir: 8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Try to retrieve as a file 8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cmd = 'RETR ' + file 8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao conn, retrlen = self.ftp.ntransfercmd(cmd) 8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ftplib.error_perm, reason: 8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if str(reason)[:3] != '550': 8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('ftp error', reason), sys.exc_info()[2] 8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not conn: 8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Set transfer mode to ASCII! 8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.voidcmd('TYPE A') 8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Try a directory listing. Verify that directory exists. 8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file: 9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pwd = self.ftp.pwd() 9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.cwd(file) 9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ftplib.error_perm, reason: 9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError, ('ftp error', reason), sys.exc_info()[2] 9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.cwd(pwd) 9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cmd = 'LIST ' + file 9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cmd = 'LIST' 9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao conn, retrlen = self.ftp.ntransfercmd(cmd) 9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.busy = 1 9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.refcount += 1 9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao conn.close() 9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Pass back both a suitably decorated object and a retrieval length 9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (ftpobj, retrlen) 9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def endtransfer(self): 9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self.busy: 9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.busy = 0 9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.voidresp() 9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ftperrors(): 9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.keepalive = False 9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.refcount <= 0: 9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.real_close() 9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def file_close(self): 9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.endtransfer() 9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.refcount -= 1 9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.refcount <= 0 and not self.keepalive: 9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.real_close() 9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def real_close(self): 9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.endtransfer() 9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ftp.close() 9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ftperrors(): 9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addbase: 9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Base class for addinfo and addclosehook.""" 9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fp): 9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = fp 9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read = self.fp.read 9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.readline = self.fp.readline 9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self.fp, "fileno"): 9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileno = self.fp.fileno 9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileno = lambda: None 9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self.fp, "__iter__"): 9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__iter__ = self.fp.__iter__ 9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self.fp, "next"): 9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.next = self.fp.next 9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __repr__(self): 9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao id(self), self.fp) 9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read = None 9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.readline = None 9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.readlines = None 9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileno = None 9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.fp: self.fp.close() 9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = None 9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addclosehook(addbase): 9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class to add a close hook to an open file.""" 9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fp, closehook, *hookargs): 9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addbase.__init__(self, fp) 9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closehook = closehook 9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.hookargs = hookargs 9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closehook: 9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closehook(*self.hookargs) 9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closehook = None 9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.hookargs = None 9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addbase.close(self) 9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addinfo(addbase): 9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """class to add an info() method to an open file.""" 9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fp, headers): 9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addbase.__init__(self, fp) 9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.headers = headers 9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def info(self): 9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.headers 9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass addinfourl(addbase): 10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """class to add info() and geturl() methods to an open file.""" 10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fp, headers, url, code=None): 10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addbase.__init__(self, fp) 10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.headers = headers 10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.url = url 10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.code = code 10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def info(self): 10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.headers 10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getcode(self): 10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.code 10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def geturl(self): 10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.url 10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Utilities to parse URLs (most of these return None for missing parts): 10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# unwrap('<URL:type://host/path>') --> 'type://host/path' 10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splittype('type:opaquestring') --> 'type', 'opaquestring' 10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splithost('//host[:port]/path') --> 'host[:port]', '/path' 10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitpasswd('user:passwd') -> 'user', 'passwd' 10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitport('host:port') --> 'host', 'port' 10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitquery('/path?query') --> '/path', 'query' 10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splittag('/path#tag') --> '/path', 'tag' 10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitattr('/path;attr1=value1;attr2=value2;...') -> 10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# '/path', ['attr1=value1', 'attr2=value2', ...] 10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splitvalue('attr=value') --> 'attr', 'value' 10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# unquote('abc%20def') -> 'abc def' 10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# quote('abc def') -> 'abc%20def') 10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao unicode 10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept NameError: 10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _is_unicode(x): 10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 0 10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse: 10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _is_unicode(x): 10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return isinstance(x, unicode) 10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef toBytes(url): 10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """toBytes(u"URL") --> 'URL'.""" 10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Most URL schemes require ASCII. If that changes, the conversion 10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # can be relaxed 10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _is_unicode(url): 10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = url.encode("ASCII") 10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeError: 10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise UnicodeError("URL " + repr(url) + 10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao " contains non-ASCII characters") 10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return url 10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef unwrap(url): 10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = url.strip() 10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if url[:1] == '<' and url[-1:] == '>': 10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao url = url[1:-1].strip() 10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if url[:4] == 'URL:': url = url[4:].strip() 10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return url 10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_typeprog = None 10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splittype(url): 10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _typeprog 10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _typeprog is None: 10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _typeprog = re.compile('^([^/:]+):') 10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _typeprog.match(url) 10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: 10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao scheme = match.group(1) 10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return scheme.lower(), url[len(scheme) + 1:] 10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None, url 10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_hostprog = None 10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splithost(url): 10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _hostprog 10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _hostprog is None: 10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _hostprog = re.compile('^//([^/?]*)(.*)$') 10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _hostprog.match(url) 10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: 10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host_port = match.group(1) 10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = match.group(2) 10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path and not path.startswith('/'): 10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = '/' + path 10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return host_port, path 10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None, url 10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_userprog = None 10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splituser(host): 10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _userprog 10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _userprog is None: 10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _userprog = re.compile('^(.*)@(.*)$') 11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _userprog.match(host) 11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: return match.group(1, 2) 11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None, host 11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_passwdprog = None 11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitpasswd(user): 11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _passwdprog 11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _passwdprog is None: 11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _passwdprog.match(user) 11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: return match.group(1, 2) 11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return user, None 11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# splittag('/path#tag') --> '/path', 'tag' 11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_portprog = None 11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitport(host): 11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splitport('host:port') --> 'host', 'port'.""" 11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _portprog 11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _portprog is None: 11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _portprog = re.compile('^(.*):([0-9]+)$') 11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _portprog.match(host) 11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: return match.group(1, 2) 11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return host, None 11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_nportprog = None 11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitnport(host, defport=-1): 11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Split host and port, returning numeric port. 11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Return given default port if no ':' found; defaults to -1. 11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Return numerical port if a valid number are found after ':'. 11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Return None if ':' but not a valid number.""" 11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _nportprog 11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _nportprog is None: 11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _nportprog = re.compile('^(.*):(.*)$') 11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _nportprog.match(host) 11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: 11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host, port = match.group(1, 2) 11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not port: raise ValueError, "no digits" 11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao nport = int(port) 11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ValueError: 11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao nport = None 11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return host, nport 11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return host, defport 11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_queryprog = None 11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitquery(url): 11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splitquery('/path?query') --> '/path', 'query'.""" 11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _queryprog 11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _queryprog is None: 11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _queryprog = re.compile('^(.*)\?([^?]*)$') 11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _queryprog.match(url) 11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: return match.group(1, 2) 11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return url, None 11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_tagprog = None 11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splittag(url): 11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splittag('/path#tag') --> '/path', 'tag'.""" 11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _tagprog 11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _tagprog is None: 11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _tagprog = re.compile('^(.*)#([^#]*)$') 11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _tagprog.match(url) 11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: return match.group(1, 2) 11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return url, None 11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitattr(url): 11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splitattr('/path;attr1=value1;attr2=value2;...') -> 11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao '/path', ['attr1=value1', 'attr2=value2', ...].""" 11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao words = url.split(';') 11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return words[0], words[1:] 11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_valueprog = None 11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef splitvalue(attr): 11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """splitvalue('attr=value') --> 'attr', 'value'.""" 11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao global _valueprog 11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _valueprog is None: 11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _valueprog = re.compile('^([^=]*)=(.*)$') 11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = _valueprog.match(attr) 11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if match: return match.group(1, 2) 11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return attr, None 11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# urlparse contains a duplicate of this method to avoid a circular import. If 11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# you update this method, also update the copy in urlparse. This code 11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# duplication does not exist in Python3. 11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_hexdig = '0123456789ABCDEFabcdef' 12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_hextochr = dict((a + b, chr(int(a + b, 16))) 12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for a in _hexdig for b in _hexdig) 12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_asciire = re.compile('([\x00-\x7f]+)') 12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef unquote(s): 12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """unquote('abc%20def') -> 'abc def'.""" 12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _is_unicode(s): 12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if '%' not in s: 12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao bits = _asciire.split(s) 12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao res = [bits[0]] 12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append = res.append 12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(1, len(bits), 2): 12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append(unquote(str(bits[i])).decode('latin1')) 12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append(bits[i + 1]) 12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ''.join(res) 12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao bits = s.split('%') 12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # fastpath 12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(bits) == 1: 12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao res = [bits[0]] 12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append = res.append 12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for item in bits[1:]: 12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append(_hextochr[item[:2]]) 12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append(item[2:]) 12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append('%') 12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append(item) 12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ''.join(res) 12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef unquote_plus(s): 12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """unquote('%7e/abc+def') -> '~/abc def'""" 12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = s.replace('+', ' ') 12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return unquote(s) 12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoalways_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'abcdefghijklmnopqrstuvwxyz' 12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao '0123456789' '_.-') 12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_safe_map = {} 12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofor i, c in zip(xrange(256), str(bytearray(xrange(256)))): 12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_safe_quoters = {} 12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef quote(s, safe='/'): 12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """quote('abc def') -> 'abc%20def' 12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Each part of a URL, e.g. the path info, the query, etc., has a 12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao different set of reserved characters that must be quoted. 12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the following reserved characters. 12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "$" | "," 12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Each of these characters is reserved in some component of a URL, 12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao but not necessarily in all of them. 12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao By default, the quote function is intended for quoting the path 12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao section of a URL. Thus, it will not encode '/'. This character 12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao is reserved, but in typical usage the quote function is being 12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao called on a path where the existing slash characters are used as 12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao reserved characters. 12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # fastpath 12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not s: 12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if s is None: 12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError('None object cannot be quoted') 12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cachekey = (safe, always_safe) 12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (quoter, safe) = _safe_quoters[cachekey] 12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao safe_map = _safe_map.copy() 12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao safe_map.update([(c, c) for c in safe]) 12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao quoter = safe_map.__getitem__ 12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao safe = always_safe + safe 12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _safe_quoters[cachekey] = (quoter, safe) 12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not s.rstrip(safe): 12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ''.join(map(quoter, s)) 12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef quote_plus(s, safe=''): 12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Quote the query fragment of a URL; replacing ' ' with '+'""" 12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ' ' in s: 12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = quote(s, safe + ' ') 12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s.replace(' ', '+') 12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return quote(s, safe) 12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef urlencode(query, doseq=0): 12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Encode a sequence of two-element tuples or dictionary into a URL query string. 12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If any values in the query arg are sequences and doseq is true, each 12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sequence element is converted to a separate parameter. 12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If the query arg is a sequence of two-element tuples, the order of the 12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parameters in the output will match the order of parameters in the 12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao input. 13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(query,"items"): 13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # mapping objects 13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao query = query.items() 13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # it's a bother at times that strings and string-like objects are 13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # sequences... 13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # non-sequence items should not work with len() 13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # non-empty strings will fail this 13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(query) and not isinstance(query[0], tuple): 13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError 13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # zero-length sequences of all types will get here and succeed, 13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # but that's a minor nit - since the original implementation 13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # allowed empty dicts that type of behavior probably should be 13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # preserved for consistency 13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except TypeError: 13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ty,va,tb = sys.exc_info() 13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError, "not a valid non-string sequence or mapping object", tb 13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = [] 13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not doseq: 13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # preserve old behavior 13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for k, v in query: 13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = quote_plus(str(k)) 13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = quote_plus(str(v)) 13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(k + '=' + v) 13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for k, v in query: 13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = quote_plus(str(k)) 13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(v, str): 13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = quote_plus(v) 13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(k + '=' + v) 13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif _is_unicode(v): 13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # is there a reasonable way to convert to ASCII? 13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # encode generates a string, but "replace" or "ignore" 13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # lose information and "strict" can raise UnicodeError 13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = quote_plus(v.encode("ASCII","replace")) 13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(k + '=' + v) 13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # is this a sufficient test for sequence-ness? 13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao len(v) 13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except TypeError: 13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # not a sequence 13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = quote_plus(str(v)) 13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(k + '=' + v) 13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # loop over the sequence 13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for elt in v: 13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(k + '=' + quote_plus(str(elt))) 13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return '&'.join(l) 13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Proxy handling 13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef getproxies_environment(): 13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Scan the environment for variables named <scheme>_proxy; 13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao this seems to be the standard convention. If you need a 13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao different way, you can pass a proxies dictionary to the 13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [Fancy]URLopener constructor. 13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies = {} 13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for name, value in os.environ.items(): 13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = name.lower() 13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if value and name[-6:] == '_proxy': 13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies[name[:-6]] = value 13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxies 13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef proxy_bypass_environment(host): 13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Test if proxies should not be used for a particular host. 13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Checks the environment for a variable named no_proxy, which should 13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao be a list of DNS suffixes separated by commas, or '*' for all hosts. 13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # '*' is special case for always bypass 13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if no_proxy == '*': 13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 1 13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # strip port off host 13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao hostonly, port = splitport(host) 13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check if the host ends with any of the DNS suffixes 13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for name in no_proxy_list: 13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if name and (hostonly.endswith(name) or host.endswith(name)): 13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 1 13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # otherwise, don't bypass 13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 0 13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif sys.platform == 'darwin': 13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from _scproxy import _get_proxy_settings, _get_proxies 13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def proxy_bypass_macosx_sysconf(host): 13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Return True iff this host shouldn't be accessed using a proxy 13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This function uses the MacOSX framework SystemConfiguration 14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao to fetch the proxy information. 14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import socket 14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from fnmatch import fnmatch 14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao hostonly, port = splitport(host) 14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def ip2num(ipAddr): 14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parts = ipAddr.split('.') 14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parts = map(int, parts) 14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(parts) != 4: 14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parts = (parts + [0, 0, 0, 0])[:4] 14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_settings = _get_proxy_settings() 14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Check for simple host names: 14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if '.' not in host: 14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxy_settings['exclude_simple']: 14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True 14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao hostIP = None 14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for value in proxy_settings.get('exceptions', ()): 14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Items in the list are strings like these: *.local, 169.254/16 14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not value: continue 14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if m is not None: 14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hostIP is None: 14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao hostIP = socket.gethostbyname(hostonly) 14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao hostIP = ip2num(hostIP) 14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except socket.error: 14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao base = ip2num(m.group(1)) 14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mask = m.group(2) 14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mask is None: 14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mask = 8 * (m.group(1).count('.') + 1) 14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mask = int(mask[1:]) 14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mask = 32 - mask 14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (hostIP >> mask) == (base >> mask): 14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True 14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif fnmatch(host, value): 14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True 14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return False 14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getproxies_macosx_sysconf(): 14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This function uses the MacOSX framework SystemConfiguration 14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao to fetch the proxy information. 14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _get_proxies() 14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def proxy_bypass(host): 14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if getproxies_environment(): 14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxy_bypass_environment(host) 14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxy_bypass_macosx_sysconf(host) 14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getproxies(): 14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getproxies_environment() or getproxies_macosx_sysconf() 14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelif os.name == 'nt': 14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getproxies_registry(): 14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Win32 uses the registry to store proxies. 14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies = {} 14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import _winreg 14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Std module, so should be around - but you never know! 14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxies 14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyEnable = _winreg.QueryValueEx(internetSettings, 14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'ProxyEnable')[0] 14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxyEnable: 14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Returned as Unicode but problems if not converted to ASCII 14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyServer = str(_winreg.QueryValueEx(internetSettings, 14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'ProxyServer')[0]) 14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if '=' in proxyServer: 14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Per-protocol settings 14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for p in proxyServer.split(';'): 14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao protocol, address = p.split('=', 1) 14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # See if address has a type:// prefix 14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not re.match('^([^/:]+)://', address): 15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao address = '%s://%s' % (protocol, address) 15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies[protocol] = address 15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Use one setting for all protocols 15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if proxyServer[:5] == 'http:': 15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies['http'] = proxyServer 15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies['http'] = 'http://%s' % proxyServer 15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies['https'] = 'https://%s' % proxyServer 15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxies['ftp'] = 'ftp://%s' % proxyServer 15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao internetSettings.Close() 15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (WindowsError, ValueError, TypeError): 15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Either registry key not found etc, or the value in an 15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # unexpected format. 15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # proxies already set up to be empty so nothing to do 15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxies 15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getproxies(): 15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Returns settings gathered from the environment, if specified, 15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao or the registry. 15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 15250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return getproxies_environment() or getproxies_registry() 15260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def proxy_bypass_registry(host): 15280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import _winreg 15300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import re 15310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 15320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Std modules, so should be around - but you never know! 15330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 0 15340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 15360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 15370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyEnable = _winreg.QueryValueEx(internetSettings, 15380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'ProxyEnable')[0] 15390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyOverride = str(_winreg.QueryValueEx(internetSettings, 15400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'ProxyOverride')[0]) 15410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ^^^^ Returned as Unicode but problems if not converted to ASCII 15420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except WindowsError: 15430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 0 15440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not proxyEnable or not proxyOverride: 15450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 0 15460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # try to make a host list from name and IP address. 15470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao rawHost, port = splitport(host) 15480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host = [rawHost] 15490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addr = socket.gethostbyname(rawHost) 15510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if addr != rawHost: 15520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host.append(addr) 15530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except socket.error: 15540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 15550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fqdn = socket.getfqdn(rawHost) 15570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fqdn != rawHost: 15580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao host.append(fqdn) 15590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except socket.error: 15600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 15610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # make a check value list from the registry entry: replace the 15620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # '<local>' string by the localhost entry and the corresponding 15630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # canonical entry. 15640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxyOverride = proxyOverride.split(';') 15650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # now check if we match one of the registry values. 15660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for test in proxyOverride: 15670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if test == '<local>': 15680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if '.' not in rawHost: 15690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 1 15700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao test = test.replace(".", r"\.") # mask dots 15710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao test = test.replace("*", r".*") # change glob sequence 15720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao test = test.replace("?", r".") # change glob char 15730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for val in host: 15740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # print "%s <--> %s" %( test, val ) 15750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if re.match(test, val, re.I): 15760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 1 15770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 0 15780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def proxy_bypass(host): 15800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a dictionary of scheme -> proxy server URL mappings. 15810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Returns settings gathered from the environment, if specified, 15830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao or the registry. 15840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 15860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if getproxies_environment(): 15870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxy_bypass_environment(host) 15880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return proxy_bypass_registry(host) 15900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15910a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse: 15920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # By default use environment variables 15930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao getproxies = getproxies_environment 15940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao proxy_bypass = proxy_bypass_environment 15950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Test and time quote() and unquote() 15970a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef test1(): 15980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = '' 15990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(256): s = s + chr(i) 16000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = s*4 16010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t0 = time.time() 16020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qs = quote(s) 16030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uqs = unquote(qs) 16040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t1 = time.time() 16050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if uqs != s: 16060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print 'Wrong!' 16070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print repr(s) 16080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print repr(qs) 16090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print repr(uqs) 16100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print round(t1 - t0, 3), 'sec' 16110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef reporthook(blocknum, blocksize, totalsize): 16140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Report during remote transfers 16150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Block number: %d, Block size: %d, Total size: %d" % ( 16160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocknum, blocksize, totalsize) 1617