1"""Open an arbitrary URL. 2 3See the following document for more info on URLs: 4"Names and Addresses, URIs, URLs, URNs, URCs", at 5http://www.w3.org/pub/WWW/Addressing/Overview.html 6 7See also the HTTP spec (from which the error codes are derived): 8"HTTP - Hypertext Transfer Protocol", at 9http://www.w3.org/pub/WWW/Protocols/ 10 11Related standards and specs: 12- RFC1808: the "relative URL" spec. (authoritative status) 13- RFC1738 - the "URL standard". (authoritative status) 14- RFC1630 - the "URI spec". (informational status) 15 16The object returned by URLopener().open(file) will differ per 17protocol. All you know is that is has methods read(), readline(), 18readlines(), fileno(), close() and info(). The read*(), fileno() 19and close() methods work like those of open files. 20The info() method returns a mimetools.Message object which can be 21used to query various info about the object, if available. 22(mimetools.Message objects are queried with the getheader() method.) 23""" 24 25import string 26import socket 27import os 28import time 29import sys 30import base64 31import re 32 33from urlparse import urljoin as basejoin 34 35__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 36 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 37 "urlencode", "url2pathname", "pathname2url", "splittag", 38 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 39 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 40 "splitnport", "splitquery", "splitattr", "splitvalue", 41 "getproxies"] 42 43__version__ = '1.17' # XXX This version is not always updated :-( 44 45MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 46 47# Helper for non-unix systems 48if os.name == 'nt': 49 from nturl2path import url2pathname, pathname2url 50elif os.name == 'riscos': 51 from rourl2path import url2pathname, pathname2url 52else: 53 def url2pathname(pathname): 54 """OS-specific conversion from a relative URL of the 'file' scheme 55 to a file system path; not recommended for general use.""" 56 return unquote(pathname) 57 58 def pathname2url(pathname): 59 """OS-specific conversion from a file system path to a relative URL 60 of the 'file' scheme; not recommended for general use.""" 61 return quote(pathname) 62 63# This really consists of two pieces: 64# (1) a class which handles opening of all sorts of URLs 65# (plus assorted utilities etc.) 66# (2) a set of functions for parsing URLs 67# XXX Should these be separated out into different modules? 68 69 70# Shortcut for basic usage 71_urlopener = None 72def urlopen(url, data=None, proxies=None): 73 """Create a file-like object for the specified URL to read from.""" 74 from warnings import warnpy3k 75 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 76 "favor of urllib2.urlopen()", stacklevel=2) 77 78 global _urlopener 79 if proxies is not None: 80 opener = FancyURLopener(proxies=proxies) 81 elif not _urlopener: 82 opener = FancyURLopener() 83 _urlopener = opener 84 else: 85 opener = _urlopener 86 if data is None: 87 return opener.open(url) 88 else: 89 return opener.open(url, data) 90def urlretrieve(url, filename=None, reporthook=None, data=None): 91 global _urlopener 92 if not _urlopener: 93 _urlopener = FancyURLopener() 94 return _urlopener.retrieve(url, filename, reporthook, data) 95def urlcleanup(): 96 if _urlopener: 97 _urlopener.cleanup() 98 _safe_quoters.clear() 99 ftpcache.clear() 100 101# check for SSL 102try: 103 import ssl 104except: 105 _have_ssl = False 106else: 107 _have_ssl = True 108 109# exception raised when downloaded size does not match content-length 110class ContentTooShortError(IOError): 111 def __init__(self, message, content): 112 IOError.__init__(self, message) 113 self.content = content 114 115ftpcache = {} 116class URLopener: 117 """Class to open URLs. 118 This is a class rather than just a subroutine because we may need 119 more than one set of global protocol-specific options. 120 Note -- this is a base class for those who don't want the 121 automatic handling of errors type 302 (relocated) and 401 122 (authorization needed).""" 123 124 __tempfiles = None 125 126 version = "Python-urllib/%s" % __version__ 127 128 # Constructor 129 def __init__(self, proxies=None, **x509): 130 if proxies is None: 131 proxies = getproxies() 132 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 133 self.proxies = proxies 134 self.key_file = x509.get('key_file') 135 self.cert_file = x509.get('cert_file') 136 self.addheaders = [('User-Agent', self.version)] 137 self.__tempfiles = [] 138 self.__unlink = os.unlink # See cleanup() 139 self.tempcache = None 140 # Undocumented feature: if you assign {} to tempcache, 141 # it is used to cache files retrieved with 142 # self.retrieve(). This is not enabled by default 143 # since it does not work for changing documents (and I 144 # haven't got the logic to check expiration headers 145 # yet). 146 self.ftpcache = ftpcache 147 # Undocumented feature: you can use a different 148 # ftp cache by assigning to the .ftpcache member; 149 # in case you want logically independent URL openers 150 # XXX This is not threadsafe. Bah. 151 152 def __del__(self): 153 self.close() 154 155 def close(self): 156 self.cleanup() 157 158 def cleanup(self): 159 # This code sometimes runs when the rest of this module 160 # has already been deleted, so it can't use any globals 161 # or import anything. 162 if self.__tempfiles: 163 for file in self.__tempfiles: 164 try: 165 self.__unlink(file) 166 except OSError: 167 pass 168 del self.__tempfiles[:] 169 if self.tempcache: 170 self.tempcache.clear() 171 172 def addheader(self, *args): 173 """Add a header to be used by the HTTP interface only 174 e.g. u.addheader('Accept', 'sound/basic')""" 175 self.addheaders.append(args) 176 177 # External interface 178 def open(self, fullurl, data=None): 179 """Use URLopener().open(file) instead of open(file, 'r').""" 180 fullurl = unwrap(toBytes(fullurl)) 181 # percent encode url, fixing lame server errors for e.g, like space 182 # within url paths. 183 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 184 if self.tempcache and fullurl in self.tempcache: 185 filename, headers = self.tempcache[fullurl] 186 fp = open(filename, 'rb') 187 return addinfourl(fp, headers, fullurl) 188 urltype, url = splittype(fullurl) 189 if not urltype: 190 urltype = 'file' 191 if urltype in self.proxies: 192 proxy = self.proxies[urltype] 193 urltype, proxyhost = splittype(proxy) 194 host, selector = splithost(proxyhost) 195 url = (host, fullurl) # Signal special case to open_*() 196 else: 197 proxy = None 198 name = 'open_' + urltype 199 self.type = urltype 200 name = name.replace('-', '_') 201 if not hasattr(self, name): 202 if proxy: 203 return self.open_unknown_proxy(proxy, fullurl, data) 204 else: 205 return self.open_unknown(fullurl, data) 206 try: 207 if data is None: 208 return getattr(self, name)(url) 209 else: 210 return getattr(self, name)(url, data) 211 except socket.error, msg: 212 raise IOError, ('socket error', msg), sys.exc_info()[2] 213 214 def open_unknown(self, fullurl, data=None): 215 """Overridable interface to open unknown URL type.""" 216 type, url = splittype(fullurl) 217 raise IOError, ('url error', 'unknown url type', type) 218 219 def open_unknown_proxy(self, proxy, fullurl, data=None): 220 """Overridable interface to open unknown URL type.""" 221 type, url = splittype(fullurl) 222 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 223 224 # External interface 225 def retrieve(self, url, filename=None, reporthook=None, data=None): 226 """retrieve(url) returns (filename, headers) for a local object 227 or (tempfilename, headers) for a remote object.""" 228 url = unwrap(toBytes(url)) 229 if self.tempcache and url in self.tempcache: 230 return self.tempcache[url] 231 type, url1 = splittype(url) 232 if filename is None and (not type or type == 'file'): 233 try: 234 fp = self.open_local_file(url1) 235 hdrs = fp.info() 236 fp.close() 237 return url2pathname(splithost(url1)[1]), hdrs 238 except IOError: 239 pass 240 fp = self.open(url, data) 241 try: 242 headers = fp.info() 243 if filename: 244 tfp = open(filename, 'wb') 245 else: 246 import tempfile 247 garbage, path = splittype(url) 248 garbage, path = splithost(path or "") 249 path, garbage = splitquery(path or "") 250 path, garbage = splitattr(path or "") 251 suffix = os.path.splitext(path)[1] 252 (fd, filename) = tempfile.mkstemp(suffix) 253 self.__tempfiles.append(filename) 254 tfp = os.fdopen(fd, 'wb') 255 try: 256 result = filename, headers 257 if self.tempcache is not None: 258 self.tempcache[url] = result 259 bs = 1024*8 260 size = -1 261 read = 0 262 blocknum = 0 263 if "content-length" in headers: 264 size = int(headers["Content-Length"]) 265 if reporthook: 266 reporthook(blocknum, bs, size) 267 while 1: 268 block = fp.read(bs) 269 if block == "": 270 break 271 read += len(block) 272 tfp.write(block) 273 blocknum += 1 274 if reporthook: 275 reporthook(blocknum, bs, size) 276 finally: 277 tfp.close() 278 finally: 279 fp.close() 280 281 # raise exception if actual size does not match content-length header 282 if size >= 0 and read < size: 283 raise ContentTooShortError("retrieval incomplete: got only %i out " 284 "of %i bytes" % (read, size), result) 285 286 return result 287 288 # Each method named open_<type> knows how to open that type of URL 289 290 def open_http(self, url, data=None): 291 """Use HTTP protocol.""" 292 import httplib 293 user_passwd = None 294 proxy_passwd= None 295 if isinstance(url, str): 296 host, selector = splithost(url) 297 if host: 298 user_passwd, host = splituser(host) 299 host = unquote(host) 300 realhost = host 301 else: 302 host, selector = url 303 # check whether the proxy contains authorization information 304 proxy_passwd, host = splituser(host) 305 # now we proceed with the url we want to obtain 306 urltype, rest = splittype(selector) 307 url = rest 308 user_passwd = None 309 if urltype.lower() != 'http': 310 realhost = None 311 else: 312 realhost, rest = splithost(rest) 313 if realhost: 314 user_passwd, realhost = splituser(realhost) 315 if user_passwd: 316 selector = "%s://%s%s" % (urltype, realhost, rest) 317 if proxy_bypass(realhost): 318 host = realhost 319 320 #print "proxy via http:", host, selector 321 if not host: raise IOError, ('http error', 'no host given') 322 323 if proxy_passwd: 324 proxy_passwd = unquote(proxy_passwd) 325 proxy_auth = base64.b64encode(proxy_passwd).strip() 326 else: 327 proxy_auth = None 328 329 if user_passwd: 330 user_passwd = unquote(user_passwd) 331 auth = base64.b64encode(user_passwd).strip() 332 else: 333 auth = None 334 h = httplib.HTTP(host) 335 if data is not None: 336 h.putrequest('POST', selector) 337 h.putheader('Content-Type', 'application/x-www-form-urlencoded') 338 h.putheader('Content-Length', '%d' % len(data)) 339 else: 340 h.putrequest('GET', selector) 341 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 342 if auth: h.putheader('Authorization', 'Basic %s' % auth) 343 if realhost: h.putheader('Host', realhost) 344 for args in self.addheaders: h.putheader(*args) 345 h.endheaders(data) 346 errcode, errmsg, headers = h.getreply() 347 fp = h.getfile() 348 if errcode == -1: 349 if fp: fp.close() 350 # something went wrong with the HTTP status line 351 raise IOError, ('http protocol error', 0, 352 'got a bad status line', None) 353 # According to RFC 2616, "2xx" code indicates that the client's 354 # request was successfully received, understood, and accepted. 355 if (200 <= errcode < 300): 356 return addinfourl(fp, headers, "http:" + url, errcode) 357 else: 358 if data is None: 359 return self.http_error(url, fp, errcode, errmsg, headers) 360 else: 361 return self.http_error(url, fp, errcode, errmsg, headers, data) 362 363 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 364 """Handle http errors. 365 Derived class can override this, or provide specific handlers 366 named http_error_DDD where DDD is the 3-digit error code.""" 367 # First check if there's a specific handler for this error 368 name = 'http_error_%d' % errcode 369 if hasattr(self, name): 370 method = getattr(self, name) 371 if data is None: 372 result = method(url, fp, errcode, errmsg, headers) 373 else: 374 result = method(url, fp, errcode, errmsg, headers, data) 375 if result: return result 376 return self.http_error_default(url, fp, errcode, errmsg, headers) 377 378 def http_error_default(self, url, fp, errcode, errmsg, headers): 379 """Default error handler: close the connection and raise IOError.""" 380 fp.close() 381 raise IOError, ('http error', errcode, errmsg, headers) 382 383 if _have_ssl: 384 def open_https(self, url, data=None): 385 """Use HTTPS protocol.""" 386 387 import httplib 388 user_passwd = None 389 proxy_passwd = None 390 if isinstance(url, str): 391 host, selector = splithost(url) 392 if host: 393 user_passwd, host = splituser(host) 394 host = unquote(host) 395 realhost = host 396 else: 397 host, selector = url 398 # here, we determine, whether the proxy contains authorization information 399 proxy_passwd, host = splituser(host) 400 urltype, rest = splittype(selector) 401 url = rest 402 user_passwd = None 403 if urltype.lower() != 'https': 404 realhost = None 405 else: 406 realhost, rest = splithost(rest) 407 if realhost: 408 user_passwd, realhost = splituser(realhost) 409 if user_passwd: 410 selector = "%s://%s%s" % (urltype, realhost, rest) 411 #print "proxy via https:", host, selector 412 if not host: raise IOError, ('https error', 'no host given') 413 if proxy_passwd: 414 proxy_passwd = unquote(proxy_passwd) 415 proxy_auth = base64.b64encode(proxy_passwd).strip() 416 else: 417 proxy_auth = None 418 if user_passwd: 419 user_passwd = unquote(user_passwd) 420 auth = base64.b64encode(user_passwd).strip() 421 else: 422 auth = None 423 h = httplib.HTTPS(host, 0, 424 key_file=self.key_file, 425 cert_file=self.cert_file) 426 if data is not None: 427 h.putrequest('POST', selector) 428 h.putheader('Content-Type', 429 'application/x-www-form-urlencoded') 430 h.putheader('Content-Length', '%d' % len(data)) 431 else: 432 h.putrequest('GET', selector) 433 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 434 if auth: h.putheader('Authorization', 'Basic %s' % auth) 435 if realhost: h.putheader('Host', realhost) 436 for args in self.addheaders: h.putheader(*args) 437 h.endheaders(data) 438 errcode, errmsg, headers = h.getreply() 439 fp = h.getfile() 440 if errcode == -1: 441 if fp: fp.close() 442 # something went wrong with the HTTP status line 443 raise IOError, ('http protocol error', 0, 444 'got a bad status line', None) 445 # According to RFC 2616, "2xx" code indicates that the client's 446 # request was successfully received, understood, and accepted. 447 if (200 <= errcode < 300): 448 return addinfourl(fp, headers, "https:" + url, errcode) 449 else: 450 if data is None: 451 return self.http_error(url, fp, errcode, errmsg, headers) 452 else: 453 return self.http_error(url, fp, errcode, errmsg, headers, 454 data) 455 456 def open_file(self, url): 457 """Use local file or FTP depending on form of URL.""" 458 if not isinstance(url, str): 459 raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 460 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 461 return self.open_ftp(url) 462 else: 463 return self.open_local_file(url) 464 465 def open_local_file(self, url): 466 """Use local file.""" 467 import mimetypes, mimetools, email.utils 468 try: 469 from cStringIO import StringIO 470 except ImportError: 471 from StringIO import StringIO 472 host, file = splithost(url) 473 localname = url2pathname(file) 474 try: 475 stats = os.stat(localname) 476 except OSError, e: 477 raise IOError(e.errno, e.strerror, e.filename) 478 size = stats.st_size 479 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 480 mtype = mimetypes.guess_type(url)[0] 481 headers = mimetools.Message(StringIO( 482 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 483 (mtype or 'text/plain', size, modified))) 484 if not host: 485 urlfile = file 486 if file[:1] == '/': 487 urlfile = 'file://' + file 488 elif file[:2] == './': 489 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 490 return addinfourl(open(localname, 'rb'), 491 headers, urlfile) 492 host, port = splitport(host) 493 if not port \ 494 and socket.gethostbyname(host) in (localhost(), thishost()): 495 urlfile = file 496 if file[:1] == '/': 497 urlfile = 'file://' + file 498 return addinfourl(open(localname, 'rb'), 499 headers, urlfile) 500 raise IOError, ('local file error', 'not on local host') 501 502 def open_ftp(self, url): 503 """Use FTP protocol.""" 504 if not isinstance(url, str): 505 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 506 import mimetypes, mimetools 507 try: 508 from cStringIO import StringIO 509 except ImportError: 510 from StringIO import StringIO 511 host, path = splithost(url) 512 if not host: raise IOError, ('ftp error', 'no host given') 513 host, port = splitport(host) 514 user, host = splituser(host) 515 if user: user, passwd = splitpasswd(user) 516 else: passwd = None 517 host = unquote(host) 518 user = user or '' 519 passwd = passwd or '' 520 host = socket.gethostbyname(host) 521 if not port: 522 import ftplib 523 port = ftplib.FTP_PORT 524 else: 525 port = int(port) 526 path, attrs = splitattr(path) 527 path = unquote(path) 528 dirs = path.split('/') 529 dirs, file = dirs[:-1], dirs[-1] 530 if dirs and not dirs[0]: dirs = dirs[1:] 531 if dirs and not dirs[0]: dirs[0] = '/' 532 key = user, host, port, '/'.join(dirs) 533 # XXX thread unsafe! 534 if len(self.ftpcache) > MAXFTPCACHE: 535 # Prune the cache, rather arbitrarily 536 for k in self.ftpcache.keys(): 537 if k != key: 538 v = self.ftpcache[k] 539 del self.ftpcache[k] 540 v.close() 541 try: 542 if not key in self.ftpcache: 543 self.ftpcache[key] = \ 544 ftpwrapper(user, passwd, host, port, dirs) 545 if not file: type = 'D' 546 else: type = 'I' 547 for attr in attrs: 548 attr, value = splitvalue(attr) 549 if attr.lower() == 'type' and \ 550 value in ('a', 'A', 'i', 'I', 'd', 'D'): 551 type = value.upper() 552 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 553 mtype = mimetypes.guess_type("ftp:" + url)[0] 554 headers = "" 555 if mtype: 556 headers += "Content-Type: %s\n" % mtype 557 if retrlen is not None and retrlen >= 0: 558 headers += "Content-Length: %d\n" % retrlen 559 headers = mimetools.Message(StringIO(headers)) 560 return addinfourl(fp, headers, "ftp:" + url) 561 except ftperrors(), msg: 562 raise IOError, ('ftp error', msg), sys.exc_info()[2] 563 564 def open_data(self, url, data=None): 565 """Use "data" URL.""" 566 if not isinstance(url, str): 567 raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 568 # ignore POSTed data 569 # 570 # syntax of data URLs: 571 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 572 # mediatype := [ type "/" subtype ] *( ";" parameter ) 573 # data := *urlchar 574 # parameter := attribute "=" value 575 import mimetools 576 try: 577 from cStringIO import StringIO 578 except ImportError: 579 from StringIO import StringIO 580 try: 581 [type, data] = url.split(',', 1) 582 except ValueError: 583 raise IOError, ('data error', 'bad data URL') 584 if not type: 585 type = 'text/plain;charset=US-ASCII' 586 semi = type.rfind(';') 587 if semi >= 0 and '=' not in type[semi:]: 588 encoding = type[semi+1:] 589 type = type[:semi] 590 else: 591 encoding = '' 592 msg = [] 593 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 594 time.gmtime(time.time()))) 595 msg.append('Content-type: %s' % type) 596 if encoding == 'base64': 597 data = base64.decodestring(data) 598 else: 599 data = unquote(data) 600 msg.append('Content-Length: %d' % len(data)) 601 msg.append('') 602 msg.append(data) 603 msg = '\n'.join(msg) 604 f = StringIO(msg) 605 headers = mimetools.Message(f, 0) 606 #f.fileno = None # needed for addinfourl 607 return addinfourl(f, headers, url) 608 609 610class FancyURLopener(URLopener): 611 """Derived class with handlers for errors we can handle (perhaps).""" 612 613 def __init__(self, *args, **kwargs): 614 URLopener.__init__(self, *args, **kwargs) 615 self.auth_cache = {} 616 self.tries = 0 617 self.maxtries = 10 618 619 def http_error_default(self, url, fp, errcode, errmsg, headers): 620 """Default error handling -- don't raise an exception.""" 621 return addinfourl(fp, headers, "http:" + url, errcode) 622 623 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 624 """Error 302 -- relocated (temporarily).""" 625 self.tries += 1 626 if self.maxtries and self.tries >= self.maxtries: 627 if hasattr(self, "http_error_500"): 628 meth = self.http_error_500 629 else: 630 meth = self.http_error_default 631 self.tries = 0 632 return meth(url, fp, 500, 633 "Internal Server Error: Redirect Recursion", headers) 634 result = self.redirect_internal(url, fp, errcode, errmsg, headers, 635 data) 636 self.tries = 0 637 return result 638 639 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 640 if 'location' in headers: 641 newurl = headers['location'] 642 elif 'uri' in headers: 643 newurl = headers['uri'] 644 else: 645 return 646 fp.close() 647 # In case the server sent a relative URL, join with original: 648 newurl = basejoin(self.type + ":" + url, newurl) 649 650 # For security reasons we do not allow redirects to protocols 651 # other than HTTP, HTTPS or FTP. 652 newurl_lower = newurl.lower() 653 if not (newurl_lower.startswith('http://') or 654 newurl_lower.startswith('https://') or 655 newurl_lower.startswith('ftp://')): 656 raise IOError('redirect error', errcode, 657 errmsg + " - Redirection to url '%s' is not allowed" % 658 newurl, 659 headers) 660 661 return self.open(newurl) 662 663 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 664 """Error 301 -- also relocated (permanently).""" 665 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 666 667 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 668 """Error 303 -- also relocated (essentially identical to 302).""" 669 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 670 671 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 672 """Error 307 -- relocated, but turn POST into error.""" 673 if data is None: 674 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 675 else: 676 return self.http_error_default(url, fp, errcode, errmsg, headers) 677 678 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 679 """Error 401 -- authentication required. 680 This function supports Basic authentication only.""" 681 if not 'www-authenticate' in headers: 682 URLopener.http_error_default(self, url, fp, 683 errcode, errmsg, headers) 684 stuff = headers['www-authenticate'] 685 import re 686 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 687 if not match: 688 URLopener.http_error_default(self, url, fp, 689 errcode, errmsg, headers) 690 scheme, realm = match.groups() 691 if scheme.lower() != 'basic': 692 URLopener.http_error_default(self, url, fp, 693 errcode, errmsg, headers) 694 name = 'retry_' + self.type + '_basic_auth' 695 if data is None: 696 return getattr(self,name)(url, realm) 697 else: 698 return getattr(self,name)(url, realm, data) 699 700 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 701 """Error 407 -- proxy authentication required. 702 This function supports Basic authentication only.""" 703 if not 'proxy-authenticate' in headers: 704 URLopener.http_error_default(self, url, fp, 705 errcode, errmsg, headers) 706 stuff = headers['proxy-authenticate'] 707 import re 708 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 709 if not match: 710 URLopener.http_error_default(self, url, fp, 711 errcode, errmsg, headers) 712 scheme, realm = match.groups() 713 if scheme.lower() != 'basic': 714 URLopener.http_error_default(self, url, fp, 715 errcode, errmsg, headers) 716 name = 'retry_proxy_' + self.type + '_basic_auth' 717 if data is None: 718 return getattr(self,name)(url, realm) 719 else: 720 return getattr(self,name)(url, realm, data) 721 722 def retry_proxy_http_basic_auth(self, url, realm, data=None): 723 host, selector = splithost(url) 724 newurl = 'http://' + host + selector 725 proxy = self.proxies['http'] 726 urltype, proxyhost = splittype(proxy) 727 proxyhost, proxyselector = splithost(proxyhost) 728 i = proxyhost.find('@') + 1 729 proxyhost = proxyhost[i:] 730 user, passwd = self.get_user_passwd(proxyhost, realm, i) 731 if not (user or passwd): return None 732 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 733 self.proxies['http'] = 'http://' + proxyhost + proxyselector 734 if data is None: 735 return self.open(newurl) 736 else: 737 return self.open(newurl, data) 738 739 def retry_proxy_https_basic_auth(self, url, realm, data=None): 740 host, selector = splithost(url) 741 newurl = 'https://' + host + selector 742 proxy = self.proxies['https'] 743 urltype, proxyhost = splittype(proxy) 744 proxyhost, proxyselector = splithost(proxyhost) 745 i = proxyhost.find('@') + 1 746 proxyhost = proxyhost[i:] 747 user, passwd = self.get_user_passwd(proxyhost, realm, i) 748 if not (user or passwd): return None 749 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 750 self.proxies['https'] = 'https://' + proxyhost + proxyselector 751 if data is None: 752 return self.open(newurl) 753 else: 754 return self.open(newurl, data) 755 756 def retry_http_basic_auth(self, url, realm, data=None): 757 host, selector = splithost(url) 758 i = host.find('@') + 1 759 host = host[i:] 760 user, passwd = self.get_user_passwd(host, realm, i) 761 if not (user or passwd): return None 762 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 763 newurl = 'http://' + host + selector 764 if data is None: 765 return self.open(newurl) 766 else: 767 return self.open(newurl, data) 768 769 def retry_https_basic_auth(self, url, realm, data=None): 770 host, selector = splithost(url) 771 i = host.find('@') + 1 772 host = host[i:] 773 user, passwd = self.get_user_passwd(host, realm, i) 774 if not (user or passwd): return None 775 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 776 newurl = 'https://' + host + selector 777 if data is None: 778 return self.open(newurl) 779 else: 780 return self.open(newurl, data) 781 782 def get_user_passwd(self, host, realm, clear_cache=0): 783 key = realm + '@' + host.lower() 784 if key in self.auth_cache: 785 if clear_cache: 786 del self.auth_cache[key] 787 else: 788 return self.auth_cache[key] 789 user, passwd = self.prompt_user_passwd(host, realm) 790 if user or passwd: self.auth_cache[key] = (user, passwd) 791 return user, passwd 792 793 def prompt_user_passwd(self, host, realm): 794 """Override this in a GUI environment!""" 795 import getpass 796 try: 797 user = raw_input("Enter username for %s at %s: " % (realm, 798 host)) 799 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 800 (user, realm, host)) 801 return user, passwd 802 except KeyboardInterrupt: 803 print 804 return None, None 805 806 807# Utility functions 808 809_localhost = None 810def localhost(): 811 """Return the IP address of the magic hostname 'localhost'.""" 812 global _localhost 813 if _localhost is None: 814 _localhost = socket.gethostbyname('localhost') 815 return _localhost 816 817_thishost = None 818def thishost(): 819 """Return the IP address of the current host.""" 820 global _thishost 821 if _thishost is None: 822 _thishost = socket.gethostbyname(socket.gethostname()) 823 return _thishost 824 825_ftperrors = None 826def ftperrors(): 827 """Return the set of errors raised by the FTP class.""" 828 global _ftperrors 829 if _ftperrors is None: 830 import ftplib 831 _ftperrors = ftplib.all_errors 832 return _ftperrors 833 834_noheaders = None 835def noheaders(): 836 """Return an empty mimetools.Message object.""" 837 global _noheaders 838 if _noheaders is None: 839 import mimetools 840 try: 841 from cStringIO import StringIO 842 except ImportError: 843 from StringIO import StringIO 844 _noheaders = mimetools.Message(StringIO(), 0) 845 _noheaders.fp.close() # Recycle file descriptor 846 return _noheaders 847 848 849# Utility classes 850 851class ftpwrapper: 852 """Class used by open_ftp() for cache of open FTP connections.""" 853 854 def __init__(self, user, passwd, host, port, dirs, 855 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 856 persistent=True): 857 self.user = user 858 self.passwd = passwd 859 self.host = host 860 self.port = port 861 self.dirs = dirs 862 self.timeout = timeout 863 self.refcount = 0 864 self.keepalive = persistent 865 self.init() 866 867 def init(self): 868 import ftplib 869 self.busy = 0 870 self.ftp = ftplib.FTP() 871 self.ftp.connect(self.host, self.port, self.timeout) 872 self.ftp.login(self.user, self.passwd) 873 for dir in self.dirs: 874 self.ftp.cwd(dir) 875 876 def retrfile(self, file, type): 877 import ftplib 878 self.endtransfer() 879 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 880 else: cmd = 'TYPE ' + type; isdir = 0 881 try: 882 self.ftp.voidcmd(cmd) 883 except ftplib.all_errors: 884 self.init() 885 self.ftp.voidcmd(cmd) 886 conn = None 887 if file and not isdir: 888 # Try to retrieve as a file 889 try: 890 cmd = 'RETR ' + file 891 conn, retrlen = self.ftp.ntransfercmd(cmd) 892 except ftplib.error_perm, reason: 893 if str(reason)[:3] != '550': 894 raise IOError, ('ftp error', reason), sys.exc_info()[2] 895 if not conn: 896 # Set transfer mode to ASCII! 897 self.ftp.voidcmd('TYPE A') 898 # Try a directory listing. Verify that directory exists. 899 if file: 900 pwd = self.ftp.pwd() 901 try: 902 try: 903 self.ftp.cwd(file) 904 except ftplib.error_perm, reason: 905 raise IOError, ('ftp error', reason), sys.exc_info()[2] 906 finally: 907 self.ftp.cwd(pwd) 908 cmd = 'LIST ' + file 909 else: 910 cmd = 'LIST' 911 conn, retrlen = self.ftp.ntransfercmd(cmd) 912 self.busy = 1 913 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 914 self.refcount += 1 915 conn.close() 916 # Pass back both a suitably decorated object and a retrieval length 917 return (ftpobj, retrlen) 918 919 def endtransfer(self): 920 if not self.busy: 921 return 922 self.busy = 0 923 try: 924 self.ftp.voidresp() 925 except ftperrors(): 926 pass 927 928 def close(self): 929 self.keepalive = False 930 if self.refcount <= 0: 931 self.real_close() 932 933 def file_close(self): 934 self.endtransfer() 935 self.refcount -= 1 936 if self.refcount <= 0 and not self.keepalive: 937 self.real_close() 938 939 def real_close(self): 940 self.endtransfer() 941 try: 942 self.ftp.close() 943 except ftperrors(): 944 pass 945 946class addbase: 947 """Base class for addinfo and addclosehook.""" 948 949 def __init__(self, fp): 950 self.fp = fp 951 self.read = self.fp.read 952 self.readline = self.fp.readline 953 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 954 if hasattr(self.fp, "fileno"): 955 self.fileno = self.fp.fileno 956 else: 957 self.fileno = lambda: None 958 if hasattr(self.fp, "__iter__"): 959 self.__iter__ = self.fp.__iter__ 960 if hasattr(self.fp, "next"): 961 self.next = self.fp.next 962 963 def __repr__(self): 964 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 965 id(self), self.fp) 966 967 def close(self): 968 self.read = None 969 self.readline = None 970 self.readlines = None 971 self.fileno = None 972 if self.fp: self.fp.close() 973 self.fp = None 974 975class addclosehook(addbase): 976 """Class to add a close hook to an open file.""" 977 978 def __init__(self, fp, closehook, *hookargs): 979 addbase.__init__(self, fp) 980 self.closehook = closehook 981 self.hookargs = hookargs 982 983 def close(self): 984 if self.closehook: 985 self.closehook(*self.hookargs) 986 self.closehook = None 987 self.hookargs = None 988 addbase.close(self) 989 990class addinfo(addbase): 991 """class to add an info() method to an open file.""" 992 993 def __init__(self, fp, headers): 994 addbase.__init__(self, fp) 995 self.headers = headers 996 997 def info(self): 998 return self.headers 999 1000class addinfourl(addbase): 1001 """class to add info() and geturl() methods to an open file.""" 1002 1003 def __init__(self, fp, headers, url, code=None): 1004 addbase.__init__(self, fp) 1005 self.headers = headers 1006 self.url = url 1007 self.code = code 1008 1009 def info(self): 1010 return self.headers 1011 1012 def getcode(self): 1013 return self.code 1014 1015 def geturl(self): 1016 return self.url 1017 1018 1019# Utilities to parse URLs (most of these return None for missing parts): 1020# unwrap('<URL:type://host/path>') --> 'type://host/path' 1021# splittype('type:opaquestring') --> 'type', 'opaquestring' 1022# splithost('//host[:port]/path') --> 'host[:port]', '/path' 1023# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 1024# splitpasswd('user:passwd') -> 'user', 'passwd' 1025# splitport('host:port') --> 'host', 'port' 1026# splitquery('/path?query') --> '/path', 'query' 1027# splittag('/path#tag') --> '/path', 'tag' 1028# splitattr('/path;attr1=value1;attr2=value2;...') -> 1029# '/path', ['attr1=value1', 'attr2=value2', ...] 1030# splitvalue('attr=value') --> 'attr', 'value' 1031# unquote('abc%20def') -> 'abc def' 1032# quote('abc def') -> 'abc%20def') 1033 1034try: 1035 unicode 1036except NameError: 1037 def _is_unicode(x): 1038 return 0 1039else: 1040 def _is_unicode(x): 1041 return isinstance(x, unicode) 1042 1043def toBytes(url): 1044 """toBytes(u"URL") --> 'URL'.""" 1045 # Most URL schemes require ASCII. If that changes, the conversion 1046 # can be relaxed 1047 if _is_unicode(url): 1048 try: 1049 url = url.encode("ASCII") 1050 except UnicodeError: 1051 raise UnicodeError("URL " + repr(url) + 1052 " contains non-ASCII characters") 1053 return url 1054 1055def unwrap(url): 1056 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 1057 url = url.strip() 1058 if url[:1] == '<' and url[-1:] == '>': 1059 url = url[1:-1].strip() 1060 if url[:4] == 'URL:': url = url[4:].strip() 1061 return url 1062 1063_typeprog = None 1064def splittype(url): 1065 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 1066 global _typeprog 1067 if _typeprog is None: 1068 import re 1069 _typeprog = re.compile('^([^/:]+):') 1070 1071 match = _typeprog.match(url) 1072 if match: 1073 scheme = match.group(1) 1074 return scheme.lower(), url[len(scheme) + 1:] 1075 return None, url 1076 1077_hostprog = None 1078def splithost(url): 1079 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 1080 global _hostprog 1081 if _hostprog is None: 1082 import re 1083 _hostprog = re.compile('^//([^/?]*)(.*)$') 1084 1085 match = _hostprog.match(url) 1086 if match: 1087 host_port = match.group(1) 1088 path = match.group(2) 1089 if path and not path.startswith('/'): 1090 path = '/' + path 1091 return host_port, path 1092 return None, url 1093 1094_userprog = None 1095def splituser(host): 1096 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 1097 global _userprog 1098 if _userprog is None: 1099 import re 1100 _userprog = re.compile('^(.*)@(.*)$') 1101 1102 match = _userprog.match(host) 1103 if match: return match.group(1, 2) 1104 return None, host 1105 1106_passwdprog = None 1107def splitpasswd(user): 1108 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 1109 global _passwdprog 1110 if _passwdprog is None: 1111 import re 1112 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 1113 1114 match = _passwdprog.match(user) 1115 if match: return match.group(1, 2) 1116 return user, None 1117 1118# splittag('/path#tag') --> '/path', 'tag' 1119_portprog = None 1120def splitport(host): 1121 """splitport('host:port') --> 'host', 'port'.""" 1122 global _portprog 1123 if _portprog is None: 1124 import re 1125 _portprog = re.compile('^(.*):([0-9]+)$') 1126 1127 match = _portprog.match(host) 1128 if match: return match.group(1, 2) 1129 return host, None 1130 1131_nportprog = None 1132def splitnport(host, defport=-1): 1133 """Split host and port, returning numeric port. 1134 Return given default port if no ':' found; defaults to -1. 1135 Return numerical port if a valid number are found after ':'. 1136 Return None if ':' but not a valid number.""" 1137 global _nportprog 1138 if _nportprog is None: 1139 import re 1140 _nportprog = re.compile('^(.*):(.*)$') 1141 1142 match = _nportprog.match(host) 1143 if match: 1144 host, port = match.group(1, 2) 1145 try: 1146 if not port: raise ValueError, "no digits" 1147 nport = int(port) 1148 except ValueError: 1149 nport = None 1150 return host, nport 1151 return host, defport 1152 1153_queryprog = None 1154def splitquery(url): 1155 """splitquery('/path?query') --> '/path', 'query'.""" 1156 global _queryprog 1157 if _queryprog is None: 1158 import re 1159 _queryprog = re.compile('^(.*)\?([^?]*)$') 1160 1161 match = _queryprog.match(url) 1162 if match: return match.group(1, 2) 1163 return url, None 1164 1165_tagprog = None 1166def splittag(url): 1167 """splittag('/path#tag') --> '/path', 'tag'.""" 1168 global _tagprog 1169 if _tagprog is None: 1170 import re 1171 _tagprog = re.compile('^(.*)#([^#]*)$') 1172 1173 match = _tagprog.match(url) 1174 if match: return match.group(1, 2) 1175 return url, None 1176 1177def splitattr(url): 1178 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1179 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1180 words = url.split(';') 1181 return words[0], words[1:] 1182 1183_valueprog = None 1184def splitvalue(attr): 1185 """splitvalue('attr=value') --> 'attr', 'value'.""" 1186 global _valueprog 1187 if _valueprog is None: 1188 import re 1189 _valueprog = re.compile('^([^=]*)=(.*)$') 1190 1191 match = _valueprog.match(attr) 1192 if match: return match.group(1, 2) 1193 return attr, None 1194 1195# urlparse contains a duplicate of this method to avoid a circular import. If 1196# you update this method, also update the copy in urlparse. This code 1197# duplication does not exist in Python3. 1198 1199_hexdig = '0123456789ABCDEFabcdef' 1200_hextochr = dict((a + b, chr(int(a + b, 16))) 1201 for a in _hexdig for b in _hexdig) 1202_asciire = re.compile('([\x00-\x7f]+)') 1203 1204def unquote(s): 1205 """unquote('abc%20def') -> 'abc def'.""" 1206 if _is_unicode(s): 1207 if '%' not in s: 1208 return s 1209 bits = _asciire.split(s) 1210 res = [bits[0]] 1211 append = res.append 1212 for i in range(1, len(bits), 2): 1213 append(unquote(str(bits[i])).decode('latin1')) 1214 append(bits[i + 1]) 1215 return ''.join(res) 1216 1217 bits = s.split('%') 1218 # fastpath 1219 if len(bits) == 1: 1220 return s 1221 res = [bits[0]] 1222 append = res.append 1223 for item in bits[1:]: 1224 try: 1225 append(_hextochr[item[:2]]) 1226 append(item[2:]) 1227 except KeyError: 1228 append('%') 1229 append(item) 1230 return ''.join(res) 1231 1232def unquote_plus(s): 1233 """unquote('%7e/abc+def') -> '~/abc def'""" 1234 s = s.replace('+', ' ') 1235 return unquote(s) 1236 1237always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1238 'abcdefghijklmnopqrstuvwxyz' 1239 '0123456789' '_.-') 1240_safe_map = {} 1241for i, c in zip(xrange(256), str(bytearray(xrange(256)))): 1242 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 1243_safe_quoters = {} 1244 1245def quote(s, safe='/'): 1246 """quote('abc def') -> 'abc%20def' 1247 1248 Each part of a URL, e.g. the path info, the query, etc., has a 1249 different set of reserved characters that must be quoted. 1250 1251 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1252 the following reserved characters. 1253 1254 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1255 "$" | "," 1256 1257 Each of these characters is reserved in some component of a URL, 1258 but not necessarily in all of them. 1259 1260 By default, the quote function is intended for quoting the path 1261 section of a URL. Thus, it will not encode '/'. This character 1262 is reserved, but in typical usage the quote function is being 1263 called on a path where the existing slash characters are used as 1264 reserved characters. 1265 """ 1266 # fastpath 1267 if not s: 1268 if s is None: 1269 raise TypeError('None object cannot be quoted') 1270 return s 1271 cachekey = (safe, always_safe) 1272 try: 1273 (quoter, safe) = _safe_quoters[cachekey] 1274 except KeyError: 1275 safe_map = _safe_map.copy() 1276 safe_map.update([(c, c) for c in safe]) 1277 quoter = safe_map.__getitem__ 1278 safe = always_safe + safe 1279 _safe_quoters[cachekey] = (quoter, safe) 1280 if not s.rstrip(safe): 1281 return s 1282 return ''.join(map(quoter, s)) 1283 1284def quote_plus(s, safe=''): 1285 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1286 if ' ' in s: 1287 s = quote(s, safe + ' ') 1288 return s.replace(' ', '+') 1289 return quote(s, safe) 1290 1291def urlencode(query, doseq=0): 1292 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1293 1294 If any values in the query arg are sequences and doseq is true, each 1295 sequence element is converted to a separate parameter. 1296 1297 If the query arg is a sequence of two-element tuples, the order of the 1298 parameters in the output will match the order of parameters in the 1299 input. 1300 """ 1301 1302 if hasattr(query,"items"): 1303 # mapping objects 1304 query = query.items() 1305 else: 1306 # it's a bother at times that strings and string-like objects are 1307 # sequences... 1308 try: 1309 # non-sequence items should not work with len() 1310 # non-empty strings will fail this 1311 if len(query) and not isinstance(query[0], tuple): 1312 raise TypeError 1313 # zero-length sequences of all types will get here and succeed, 1314 # but that's a minor nit - since the original implementation 1315 # allowed empty dicts that type of behavior probably should be 1316 # preserved for consistency 1317 except TypeError: 1318 ty,va,tb = sys.exc_info() 1319 raise TypeError, "not a valid non-string sequence or mapping object", tb 1320 1321 l = [] 1322 if not doseq: 1323 # preserve old behavior 1324 for k, v in query: 1325 k = quote_plus(str(k)) 1326 v = quote_plus(str(v)) 1327 l.append(k + '=' + v) 1328 else: 1329 for k, v in query: 1330 k = quote_plus(str(k)) 1331 if isinstance(v, str): 1332 v = quote_plus(v) 1333 l.append(k + '=' + v) 1334 elif _is_unicode(v): 1335 # is there a reasonable way to convert to ASCII? 1336 # encode generates a string, but "replace" or "ignore" 1337 # lose information and "strict" can raise UnicodeError 1338 v = quote_plus(v.encode("ASCII","replace")) 1339 l.append(k + '=' + v) 1340 else: 1341 try: 1342 # is this a sufficient test for sequence-ness? 1343 len(v) 1344 except TypeError: 1345 # not a sequence 1346 v = quote_plus(str(v)) 1347 l.append(k + '=' + v) 1348 else: 1349 # loop over the sequence 1350 for elt in v: 1351 l.append(k + '=' + quote_plus(str(elt))) 1352 return '&'.join(l) 1353 1354# Proxy handling 1355def getproxies_environment(): 1356 """Return a dictionary of scheme -> proxy server URL mappings. 1357 1358 Scan the environment for variables named <scheme>_proxy; 1359 this seems to be the standard convention. If you need a 1360 different way, you can pass a proxies dictionary to the 1361 [Fancy]URLopener constructor. 1362 1363 """ 1364 proxies = {} 1365 for name, value in os.environ.items(): 1366 name = name.lower() 1367 if value and name[-6:] == '_proxy': 1368 proxies[name[:-6]] = value 1369 return proxies 1370 1371def proxy_bypass_environment(host): 1372 """Test if proxies should not be used for a particular host. 1373 1374 Checks the environment for a variable named no_proxy, which should 1375 be a list of DNS suffixes separated by commas, or '*' for all hosts. 1376 """ 1377 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 1378 # '*' is special case for always bypass 1379 if no_proxy == '*': 1380 return 1 1381 # strip port off host 1382 hostonly, port = splitport(host) 1383 # check if the host ends with any of the DNS suffixes 1384 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 1385 for name in no_proxy_list: 1386 if name and (hostonly.endswith(name) or host.endswith(name)): 1387 return 1 1388 # otherwise, don't bypass 1389 return 0 1390 1391 1392if sys.platform == 'darwin': 1393 from _scproxy import _get_proxy_settings, _get_proxies 1394 1395 def proxy_bypass_macosx_sysconf(host): 1396 """ 1397 Return True iff this host shouldn't be accessed using a proxy 1398 1399 This function uses the MacOSX framework SystemConfiguration 1400 to fetch the proxy information. 1401 """ 1402 import re 1403 import socket 1404 from fnmatch import fnmatch 1405 1406 hostonly, port = splitport(host) 1407 1408 def ip2num(ipAddr): 1409 parts = ipAddr.split('.') 1410 parts = map(int, parts) 1411 if len(parts) != 4: 1412 parts = (parts + [0, 0, 0, 0])[:4] 1413 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 1414 1415 proxy_settings = _get_proxy_settings() 1416 1417 # Check for simple host names: 1418 if '.' not in host: 1419 if proxy_settings['exclude_simple']: 1420 return True 1421 1422 hostIP = None 1423 1424 for value in proxy_settings.get('exceptions', ()): 1425 # Items in the list are strings like these: *.local, 169.254/16 1426 if not value: continue 1427 1428 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 1429 if m is not None: 1430 if hostIP is None: 1431 try: 1432 hostIP = socket.gethostbyname(hostonly) 1433 hostIP = ip2num(hostIP) 1434 except socket.error: 1435 continue 1436 1437 base = ip2num(m.group(1)) 1438 mask = m.group(2) 1439 if mask is None: 1440 mask = 8 * (m.group(1).count('.') + 1) 1441 1442 else: 1443 mask = int(mask[1:]) 1444 mask = 32 - mask 1445 1446 if (hostIP >> mask) == (base >> mask): 1447 return True 1448 1449 elif fnmatch(host, value): 1450 return True 1451 1452 return False 1453 1454 def getproxies_macosx_sysconf(): 1455 """Return a dictionary of scheme -> proxy server URL mappings. 1456 1457 This function uses the MacOSX framework SystemConfiguration 1458 to fetch the proxy information. 1459 """ 1460 return _get_proxies() 1461 1462 def proxy_bypass(host): 1463 if getproxies_environment(): 1464 return proxy_bypass_environment(host) 1465 else: 1466 return proxy_bypass_macosx_sysconf(host) 1467 1468 def getproxies(): 1469 return getproxies_environment() or getproxies_macosx_sysconf() 1470 1471elif os.name == 'nt': 1472 def getproxies_registry(): 1473 """Return a dictionary of scheme -> proxy server URL mappings. 1474 1475 Win32 uses the registry to store proxies. 1476 1477 """ 1478 proxies = {} 1479 try: 1480 import _winreg 1481 except ImportError: 1482 # Std module, so should be around - but you never know! 1483 return proxies 1484 try: 1485 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1486 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1487 proxyEnable = _winreg.QueryValueEx(internetSettings, 1488 'ProxyEnable')[0] 1489 if proxyEnable: 1490 # Returned as Unicode but problems if not converted to ASCII 1491 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1492 'ProxyServer')[0]) 1493 if '=' in proxyServer: 1494 # Per-protocol settings 1495 for p in proxyServer.split(';'): 1496 protocol, address = p.split('=', 1) 1497 # See if address has a type:// prefix 1498 import re 1499 if not re.match('^([^/:]+)://', address): 1500 address = '%s://%s' % (protocol, address) 1501 proxies[protocol] = address 1502 else: 1503 # Use one setting for all protocols 1504 if proxyServer[:5] == 'http:': 1505 proxies['http'] = proxyServer 1506 else: 1507 proxies['http'] = 'http://%s' % proxyServer 1508 proxies['https'] = 'https://%s' % proxyServer 1509 proxies['ftp'] = 'ftp://%s' % proxyServer 1510 internetSettings.Close() 1511 except (WindowsError, ValueError, TypeError): 1512 # Either registry key not found etc, or the value in an 1513 # unexpected format. 1514 # proxies already set up to be empty so nothing to do 1515 pass 1516 return proxies 1517 1518 def getproxies(): 1519 """Return a dictionary of scheme -> proxy server URL mappings. 1520 1521 Returns settings gathered from the environment, if specified, 1522 or the registry. 1523 1524 """ 1525 return getproxies_environment() or getproxies_registry() 1526 1527 def proxy_bypass_registry(host): 1528 try: 1529 import _winreg 1530 import re 1531 except ImportError: 1532 # Std modules, so should be around - but you never know! 1533 return 0 1534 try: 1535 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1536 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1537 proxyEnable = _winreg.QueryValueEx(internetSettings, 1538 'ProxyEnable')[0] 1539 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1540 'ProxyOverride')[0]) 1541 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1542 except WindowsError: 1543 return 0 1544 if not proxyEnable or not proxyOverride: 1545 return 0 1546 # try to make a host list from name and IP address. 1547 rawHost, port = splitport(host) 1548 host = [rawHost] 1549 try: 1550 addr = socket.gethostbyname(rawHost) 1551 if addr != rawHost: 1552 host.append(addr) 1553 except socket.error: 1554 pass 1555 try: 1556 fqdn = socket.getfqdn(rawHost) 1557 if fqdn != rawHost: 1558 host.append(fqdn) 1559 except socket.error: 1560 pass 1561 # make a check value list from the registry entry: replace the 1562 # '<local>' string by the localhost entry and the corresponding 1563 # canonical entry. 1564 proxyOverride = proxyOverride.split(';') 1565 # now check if we match one of the registry values. 1566 for test in proxyOverride: 1567 if test == '<local>': 1568 if '.' not in rawHost: 1569 return 1 1570 test = test.replace(".", r"\.") # mask dots 1571 test = test.replace("*", r".*") # change glob sequence 1572 test = test.replace("?", r".") # change glob char 1573 for val in host: 1574 # print "%s <--> %s" %( test, val ) 1575 if re.match(test, val, re.I): 1576 return 1 1577 return 0 1578 1579 def proxy_bypass(host): 1580 """Return a dictionary of scheme -> proxy server URL mappings. 1581 1582 Returns settings gathered from the environment, if specified, 1583 or the registry. 1584 1585 """ 1586 if getproxies_environment(): 1587 return proxy_bypass_environment(host) 1588 else: 1589 return proxy_bypass_registry(host) 1590 1591else: 1592 # By default use environment variables 1593 getproxies = getproxies_environment 1594 proxy_bypass = proxy_bypass_environment 1595 1596# Test and time quote() and unquote() 1597def test1(): 1598 s = '' 1599 for i in range(256): s = s + chr(i) 1600 s = s*4 1601 t0 = time.time() 1602 qs = quote(s) 1603 uqs = unquote(qs) 1604 t1 = time.time() 1605 if uqs != s: 1606 print 'Wrong!' 1607 print repr(s) 1608 print repr(qs) 1609 print repr(uqs) 1610 print round(t1 - t0, 3), 'sec' 1611 1612 1613def reporthook(blocknum, blocksize, totalsize): 1614 # Report during remote transfers 1615 print "Block number: %d, Block size: %d, Total size: %d" % ( 1616 blocknum, blocksize, totalsize) 1617