urllib.py revision bcd833f30f77160e321056fa548d76e2abe26701
1"""Open an arbitrary URL. 2 3See the following document for more info on URLs: 4"Names and Addresses, URIs, URLs, URNs, URCs", at 5http://www.w3.org/pub/WWW/Addressing/Overview.html 6 7See also the HTTP spec (from which the error codes are derived): 8"HTTP - Hypertext Transfer Protocol", at 9http://www.w3.org/pub/WWW/Protocols/ 10 11Related standards and specs: 12- RFC1808: the "relative URL" spec. (authoritative status) 13- RFC1738 - the "URL standard". (authoritative status) 14- RFC1630 - the "URI spec". (informational status) 15 16The object returned by URLopener().open(file) will differ per 17protocol. All you know is that is has methods read(), readline(), 18readlines(), fileno(), close() and info(). The read*(), fileno() 19and close() methods work like those of open files. 20The info() method returns a mimetools.Message object which can be 21used to query various info about the object, if available. 22(mimetools.Message objects are queried with the getheader() method.) 23""" 24 25import string 26import socket 27import os 28import time 29import sys 30import base64 31 32from urlparse import urljoin as basejoin 33 34__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 35 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 36 "urlencode", "url2pathname", "pathname2url", "splittag", 37 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 38 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 39 "splitnport", "splitquery", "splitattr", "splitvalue", 40 "getproxies"] 41 42__version__ = '1.17' # XXX This version is not always updated :-( 43 44MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 45 46# Helper for non-unix systems 47if os.name == 'nt': 48 from nturl2path import url2pathname, pathname2url 49elif os.name == 'riscos': 50 from rourl2path import url2pathname, pathname2url 51else: 52 def url2pathname(pathname): 53 """OS-specific conversion from a relative URL of the 'file' scheme 54 to a file system path; not recommended for general use.""" 55 return unquote(pathname) 56 57 def pathname2url(pathname): 58 """OS-specific conversion from a file system path to a relative URL 59 of the 'file' scheme; not recommended for general use.""" 60 return quote(pathname) 61 62# This really consists of two pieces: 63# (1) a class which handles opening of all sorts of URLs 64# (plus assorted utilities etc.) 65# (2) a set of functions for parsing URLs 66# XXX Should these be separated out into different modules? 67 68 69# Shortcut for basic usage 70_urlopener = None 71def urlopen(url, data=None, proxies=None): 72 """Create a file-like object for the specified URL to read from.""" 73 from warnings import warnpy3k 74 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 75 "favor of urllib2.urlopen()", stacklevel=2) 76 77 global _urlopener 78 if proxies is not None: 79 opener = FancyURLopener(proxies=proxies) 80 elif not _urlopener: 81 opener = FancyURLopener() 82 _urlopener = opener 83 else: 84 opener = _urlopener 85 if data is None: 86 return opener.open(url) 87 else: 88 return opener.open(url, data) 89def urlretrieve(url, filename=None, reporthook=None, data=None): 90 global _urlopener 91 if not _urlopener: 92 _urlopener = FancyURLopener() 93 return _urlopener.retrieve(url, filename, reporthook, data) 94def urlcleanup(): 95 if _urlopener: 96 _urlopener.cleanup() 97 _safe_quoters.clear() 98 ftpcache.clear() 99 100# check for SSL 101try: 102 import ssl 103except: 104 _have_ssl = False 105else: 106 _have_ssl = True 107 108# exception raised when downloaded size does not match content-length 109class ContentTooShortError(IOError): 110 def __init__(self, message, content): 111 IOError.__init__(self, message) 112 self.content = content 113 114ftpcache = {} 115class URLopener: 116 """Class to open URLs. 117 This is a class rather than just a subroutine because we may need 118 more than one set of global protocol-specific options. 119 Note -- this is a base class for those who don't want the 120 automatic handling of errors type 302 (relocated) and 401 121 (authorization needed).""" 122 123 __tempfiles = None 124 125 version = "Python-urllib/%s" % __version__ 126 127 # Constructor 128 def __init__(self, proxies=None, **x509): 129 if proxies is None: 130 proxies = getproxies() 131 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 132 self.proxies = proxies 133 self.key_file = x509.get('key_file') 134 self.cert_file = x509.get('cert_file') 135 self.addheaders = [('User-Agent', self.version)] 136 self.__tempfiles = [] 137 self.__unlink = os.unlink # See cleanup() 138 self.tempcache = None 139 # Undocumented feature: if you assign {} to tempcache, 140 # it is used to cache files retrieved with 141 # self.retrieve(). This is not enabled by default 142 # since it does not work for changing documents (and I 143 # haven't got the logic to check expiration headers 144 # yet). 145 self.ftpcache = ftpcache 146 # Undocumented feature: you can use a different 147 # ftp cache by assigning to the .ftpcache member; 148 # in case you want logically independent URL openers 149 # XXX This is not threadsafe. Bah. 150 151 def __del__(self): 152 self.close() 153 154 def close(self): 155 self.cleanup() 156 157 def cleanup(self): 158 # This code sometimes runs when the rest of this module 159 # has already been deleted, so it can't use any globals 160 # or import anything. 161 if self.__tempfiles: 162 for file in self.__tempfiles: 163 try: 164 self.__unlink(file) 165 except OSError: 166 pass 167 del self.__tempfiles[:] 168 if self.tempcache: 169 self.tempcache.clear() 170 171 def addheader(self, *args): 172 """Add a header to be used by the HTTP interface only 173 e.g. u.addheader('Accept', 'sound/basic')""" 174 self.addheaders.append(args) 175 176 # External interface 177 def open(self, fullurl, data=None): 178 """Use URLopener().open(file) instead of open(file, 'r').""" 179 fullurl = unwrap(toBytes(fullurl)) 180 # percent encode url, fixing lame server errors for e.g, like space 181 # within url paths. 182 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 183 if self.tempcache and fullurl in self.tempcache: 184 filename, headers = self.tempcache[fullurl] 185 fp = open(filename, 'rb') 186 return addinfourl(fp, headers, fullurl) 187 urltype, url = splittype(fullurl) 188 if not urltype: 189 urltype = 'file' 190 if urltype in self.proxies: 191 proxy = self.proxies[urltype] 192 urltype, proxyhost = splittype(proxy) 193 host, selector = splithost(proxyhost) 194 url = (host, fullurl) # Signal special case to open_*() 195 else: 196 proxy = None 197 name = 'open_' + urltype 198 self.type = urltype 199 name = name.replace('-', '_') 200 if not hasattr(self, name): 201 if proxy: 202 return self.open_unknown_proxy(proxy, fullurl, data) 203 else: 204 return self.open_unknown(fullurl, data) 205 try: 206 if data is None: 207 return getattr(self, name)(url) 208 else: 209 return getattr(self, name)(url, data) 210 except socket.error, msg: 211 raise IOError, ('socket error', msg), sys.exc_info()[2] 212 213 def open_unknown(self, fullurl, data=None): 214 """Overridable interface to open unknown URL type.""" 215 type, url = splittype(fullurl) 216 raise IOError, ('url error', 'unknown url type', type) 217 218 def open_unknown_proxy(self, proxy, fullurl, data=None): 219 """Overridable interface to open unknown URL type.""" 220 type, url = splittype(fullurl) 221 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 222 223 # External interface 224 def retrieve(self, url, filename=None, reporthook=None, data=None): 225 """retrieve(url) returns (filename, headers) for a local object 226 or (tempfilename, headers) for a remote object.""" 227 url = unwrap(toBytes(url)) 228 if self.tempcache and url in self.tempcache: 229 return self.tempcache[url] 230 type, url1 = splittype(url) 231 if filename is None and (not type or type == 'file'): 232 try: 233 fp = self.open_local_file(url1) 234 hdrs = fp.info() 235 fp.close() 236 return url2pathname(splithost(url1)[1]), hdrs 237 except IOError: 238 pass 239 fp = self.open(url, data) 240 try: 241 headers = fp.info() 242 if filename: 243 tfp = open(filename, 'wb') 244 else: 245 import tempfile 246 garbage, path = splittype(url) 247 garbage, path = splithost(path or "") 248 path, garbage = splitquery(path or "") 249 path, garbage = splitattr(path or "") 250 suffix = os.path.splitext(path)[1] 251 (fd, filename) = tempfile.mkstemp(suffix) 252 self.__tempfiles.append(filename) 253 tfp = os.fdopen(fd, 'wb') 254 try: 255 result = filename, headers 256 if self.tempcache is not None: 257 self.tempcache[url] = result 258 bs = 1024*8 259 size = -1 260 read = 0 261 blocknum = 0 262 if "content-length" in headers: 263 size = int(headers["Content-Length"]) 264 if reporthook: 265 reporthook(blocknum, bs, size) 266 while 1: 267 block = fp.read(bs) 268 if block == "": 269 break 270 read += len(block) 271 tfp.write(block) 272 blocknum += 1 273 if reporthook: 274 reporthook(blocknum, bs, size) 275 finally: 276 tfp.close() 277 finally: 278 fp.close() 279 280 # raise exception if actual size does not match content-length header 281 if size >= 0 and read < size: 282 raise ContentTooShortError("retrieval incomplete: got only %i out " 283 "of %i bytes" % (read, size), result) 284 285 return result 286 287 # Each method named open_<type> knows how to open that type of URL 288 289 def open_http(self, url, data=None): 290 """Use HTTP protocol.""" 291 import httplib 292 user_passwd = None 293 proxy_passwd= None 294 if isinstance(url, str): 295 host, selector = splithost(url) 296 if host: 297 user_passwd, host = splituser(host) 298 host = unquote(host) 299 realhost = host 300 else: 301 host, selector = url 302 # check whether the proxy contains authorization information 303 proxy_passwd, host = splituser(host) 304 # now we proceed with the url we want to obtain 305 urltype, rest = splittype(selector) 306 url = rest 307 user_passwd = None 308 if urltype.lower() != 'http': 309 realhost = None 310 else: 311 realhost, rest = splithost(rest) 312 if realhost: 313 user_passwd, realhost = splituser(realhost) 314 if user_passwd: 315 selector = "%s://%s%s" % (urltype, realhost, rest) 316 if proxy_bypass(realhost): 317 host = realhost 318 319 #print "proxy via http:", host, selector 320 if not host: raise IOError, ('http error', 'no host given') 321 322 if proxy_passwd: 323 proxy_passwd = unquote(proxy_passwd) 324 proxy_auth = base64.b64encode(proxy_passwd).strip() 325 else: 326 proxy_auth = None 327 328 if user_passwd: 329 user_passwd = unquote(user_passwd) 330 auth = base64.b64encode(user_passwd).strip() 331 else: 332 auth = None 333 h = httplib.HTTP(host) 334 if data is not None: 335 h.putrequest('POST', selector) 336 h.putheader('Content-Type', 'application/x-www-form-urlencoded') 337 h.putheader('Content-Length', '%d' % len(data)) 338 else: 339 h.putrequest('GET', selector) 340 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 341 if auth: h.putheader('Authorization', 'Basic %s' % auth) 342 if realhost: h.putheader('Host', realhost) 343 for args in self.addheaders: h.putheader(*args) 344 h.endheaders(data) 345 errcode, errmsg, headers = h.getreply() 346 fp = h.getfile() 347 if errcode == -1: 348 if fp: fp.close() 349 # something went wrong with the HTTP status line 350 raise IOError, ('http protocol error', 0, 351 'got a bad status line', None) 352 # According to RFC 2616, "2xx" code indicates that the client's 353 # request was successfully received, understood, and accepted. 354 if (200 <= errcode < 300): 355 return addinfourl(fp, headers, "http:" + url, errcode) 356 else: 357 if data is None: 358 return self.http_error(url, fp, errcode, errmsg, headers) 359 else: 360 return self.http_error(url, fp, errcode, errmsg, headers, data) 361 362 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 363 """Handle http errors. 364 Derived class can override this, or provide specific handlers 365 named http_error_DDD where DDD is the 3-digit error code.""" 366 # First check if there's a specific handler for this error 367 name = 'http_error_%d' % errcode 368 if hasattr(self, name): 369 method = getattr(self, name) 370 if data is None: 371 result = method(url, fp, errcode, errmsg, headers) 372 else: 373 result = method(url, fp, errcode, errmsg, headers, data) 374 if result: return result 375 return self.http_error_default(url, fp, errcode, errmsg, headers) 376 377 def http_error_default(self, url, fp, errcode, errmsg, headers): 378 """Default error handler: close the connection and raise IOError.""" 379 fp.close() 380 raise IOError, ('http error', errcode, errmsg, headers) 381 382 if _have_ssl: 383 def open_https(self, url, data=None): 384 """Use HTTPS protocol.""" 385 386 import httplib 387 user_passwd = None 388 proxy_passwd = None 389 if isinstance(url, str): 390 host, selector = splithost(url) 391 if host: 392 user_passwd, host = splituser(host) 393 host = unquote(host) 394 realhost = host 395 else: 396 host, selector = url 397 # here, we determine, whether the proxy contains authorization information 398 proxy_passwd, host = splituser(host) 399 urltype, rest = splittype(selector) 400 url = rest 401 user_passwd = None 402 if urltype.lower() != 'https': 403 realhost = None 404 else: 405 realhost, rest = splithost(rest) 406 if realhost: 407 user_passwd, realhost = splituser(realhost) 408 if user_passwd: 409 selector = "%s://%s%s" % (urltype, realhost, rest) 410 #print "proxy via https:", host, selector 411 if not host: raise IOError, ('https error', 'no host given') 412 if proxy_passwd: 413 proxy_passwd = unquote(proxy_passwd) 414 proxy_auth = base64.b64encode(proxy_passwd).strip() 415 else: 416 proxy_auth = None 417 if user_passwd: 418 user_passwd = unquote(user_passwd) 419 auth = base64.b64encode(user_passwd).strip() 420 else: 421 auth = None 422 h = httplib.HTTPS(host, 0, 423 key_file=self.key_file, 424 cert_file=self.cert_file) 425 if data is not None: 426 h.putrequest('POST', selector) 427 h.putheader('Content-Type', 428 'application/x-www-form-urlencoded') 429 h.putheader('Content-Length', '%d' % len(data)) 430 else: 431 h.putrequest('GET', selector) 432 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 433 if auth: h.putheader('Authorization', 'Basic %s' % auth) 434 if realhost: h.putheader('Host', realhost) 435 for args in self.addheaders: h.putheader(*args) 436 h.endheaders(data) 437 errcode, errmsg, headers = h.getreply() 438 fp = h.getfile() 439 if errcode == -1: 440 if fp: fp.close() 441 # something went wrong with the HTTP status line 442 raise IOError, ('http protocol error', 0, 443 'got a bad status line', None) 444 # According to RFC 2616, "2xx" code indicates that the client's 445 # request was successfully received, understood, and accepted. 446 if (200 <= errcode < 300): 447 return addinfourl(fp, headers, "https:" + url, errcode) 448 else: 449 if data is None: 450 return self.http_error(url, fp, errcode, errmsg, headers) 451 else: 452 return self.http_error(url, fp, errcode, errmsg, headers, 453 data) 454 455 def open_file(self, url): 456 """Use local file or FTP depending on form of URL.""" 457 if not isinstance(url, str): 458 raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 459 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 460 return self.open_ftp(url) 461 else: 462 return self.open_local_file(url) 463 464 def open_local_file(self, url): 465 """Use local file.""" 466 import mimetypes, mimetools, email.utils 467 try: 468 from cStringIO import StringIO 469 except ImportError: 470 from StringIO import StringIO 471 host, file = splithost(url) 472 localname = url2pathname(file) 473 try: 474 stats = os.stat(localname) 475 except OSError, e: 476 raise IOError(e.errno, e.strerror, e.filename) 477 size = stats.st_size 478 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 479 mtype = mimetypes.guess_type(url)[0] 480 headers = mimetools.Message(StringIO( 481 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 482 (mtype or 'text/plain', size, modified))) 483 if not host: 484 urlfile = file 485 if file[:1] == '/': 486 urlfile = 'file://' + file 487 return addinfourl(open(localname, 'rb'), 488 headers, urlfile) 489 host, port = splitport(host) 490 if not port \ 491 and socket.gethostbyname(host) in (localhost(), thishost()): 492 urlfile = file 493 if file[:1] == '/': 494 urlfile = 'file://' + file 495 return addinfourl(open(localname, 'rb'), 496 headers, urlfile) 497 raise IOError, ('local file error', 'not on local host') 498 499 def open_ftp(self, url): 500 """Use FTP protocol.""" 501 if not isinstance(url, str): 502 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 503 import mimetypes, mimetools 504 try: 505 from cStringIO import StringIO 506 except ImportError: 507 from StringIO import StringIO 508 host, path = splithost(url) 509 if not host: raise IOError, ('ftp error', 'no host given') 510 host, port = splitport(host) 511 user, host = splituser(host) 512 if user: user, passwd = splitpasswd(user) 513 else: passwd = None 514 host = unquote(host) 515 user = user or '' 516 passwd = passwd or '' 517 host = socket.gethostbyname(host) 518 if not port: 519 import ftplib 520 port = ftplib.FTP_PORT 521 else: 522 port = int(port) 523 path, attrs = splitattr(path) 524 path = unquote(path) 525 dirs = path.split('/') 526 dirs, file = dirs[:-1], dirs[-1] 527 if dirs and not dirs[0]: dirs = dirs[1:] 528 if dirs and not dirs[0]: dirs[0] = '/' 529 key = user, host, port, '/'.join(dirs) 530 # XXX thread unsafe! 531 if len(self.ftpcache) > MAXFTPCACHE: 532 # Prune the cache, rather arbitrarily 533 for k in self.ftpcache.keys(): 534 if k != key: 535 v = self.ftpcache[k] 536 del self.ftpcache[k] 537 v.close() 538 try: 539 if not key in self.ftpcache: 540 self.ftpcache[key] = \ 541 ftpwrapper(user, passwd, host, port, dirs) 542 if not file: type = 'D' 543 else: type = 'I' 544 for attr in attrs: 545 attr, value = splitvalue(attr) 546 if attr.lower() == 'type' and \ 547 value in ('a', 'A', 'i', 'I', 'd', 'D'): 548 type = value.upper() 549 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 550 mtype = mimetypes.guess_type("ftp:" + url)[0] 551 headers = "" 552 if mtype: 553 headers += "Content-Type: %s\n" % mtype 554 if retrlen is not None and retrlen >= 0: 555 headers += "Content-Length: %d\n" % retrlen 556 headers = mimetools.Message(StringIO(headers)) 557 return addinfourl(fp, headers, "ftp:" + url) 558 except ftperrors(), msg: 559 raise IOError, ('ftp error', msg), sys.exc_info()[2] 560 561 def open_data(self, url, data=None): 562 """Use "data" URL.""" 563 if not isinstance(url, str): 564 raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 565 # ignore POSTed data 566 # 567 # syntax of data URLs: 568 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 569 # mediatype := [ type "/" subtype ] *( ";" parameter ) 570 # data := *urlchar 571 # parameter := attribute "=" value 572 import mimetools 573 try: 574 from cStringIO import StringIO 575 except ImportError: 576 from StringIO import StringIO 577 try: 578 [type, data] = url.split(',', 1) 579 except ValueError: 580 raise IOError, ('data error', 'bad data URL') 581 if not type: 582 type = 'text/plain;charset=US-ASCII' 583 semi = type.rfind(';') 584 if semi >= 0 and '=' not in type[semi:]: 585 encoding = type[semi+1:] 586 type = type[:semi] 587 else: 588 encoding = '' 589 msg = [] 590 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 591 time.gmtime(time.time()))) 592 msg.append('Content-type: %s' % type) 593 if encoding == 'base64': 594 data = base64.decodestring(data) 595 else: 596 data = unquote(data) 597 msg.append('Content-Length: %d' % len(data)) 598 msg.append('') 599 msg.append(data) 600 msg = '\n'.join(msg) 601 f = StringIO(msg) 602 headers = mimetools.Message(f, 0) 603 #f.fileno = None # needed for addinfourl 604 return addinfourl(f, headers, url) 605 606 607class FancyURLopener(URLopener): 608 """Derived class with handlers for errors we can handle (perhaps).""" 609 610 def __init__(self, *args, **kwargs): 611 URLopener.__init__(self, *args, **kwargs) 612 self.auth_cache = {} 613 self.tries = 0 614 self.maxtries = 10 615 616 def http_error_default(self, url, fp, errcode, errmsg, headers): 617 """Default error handling -- don't raise an exception.""" 618 return addinfourl(fp, headers, "http:" + url, errcode) 619 620 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 621 """Error 302 -- relocated (temporarily).""" 622 self.tries += 1 623 if self.maxtries and self.tries >= self.maxtries: 624 if hasattr(self, "http_error_500"): 625 meth = self.http_error_500 626 else: 627 meth = self.http_error_default 628 self.tries = 0 629 return meth(url, fp, 500, 630 "Internal Server Error: Redirect Recursion", headers) 631 result = self.redirect_internal(url, fp, errcode, errmsg, headers, 632 data) 633 self.tries = 0 634 return result 635 636 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 637 if 'location' in headers: 638 newurl = headers['location'] 639 elif 'uri' in headers: 640 newurl = headers['uri'] 641 else: 642 return 643 fp.close() 644 # In case the server sent a relative URL, join with original: 645 newurl = basejoin(self.type + ":" + url, newurl) 646 647 # For security reasons we do not allow redirects to protocols 648 # other than HTTP, HTTPS or FTP. 649 newurl_lower = newurl.lower() 650 if not (newurl_lower.startswith('http://') or 651 newurl_lower.startswith('https://') or 652 newurl_lower.startswith('ftp://')): 653 raise IOError('redirect error', errcode, 654 errmsg + " - Redirection to url '%s' is not allowed" % 655 newurl, 656 headers) 657 658 return self.open(newurl) 659 660 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 661 """Error 301 -- also relocated (permanently).""" 662 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 663 664 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 665 """Error 303 -- also relocated (essentially identical to 302).""" 666 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 667 668 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 669 """Error 307 -- relocated, but turn POST into error.""" 670 if data is None: 671 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 672 else: 673 return self.http_error_default(url, fp, errcode, errmsg, headers) 674 675 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 676 """Error 401 -- authentication required. 677 This function supports Basic authentication only.""" 678 if not 'www-authenticate' in headers: 679 URLopener.http_error_default(self, url, fp, 680 errcode, errmsg, headers) 681 stuff = headers['www-authenticate'] 682 import re 683 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 684 if not match: 685 URLopener.http_error_default(self, url, fp, 686 errcode, errmsg, headers) 687 scheme, realm = match.groups() 688 if scheme.lower() != 'basic': 689 URLopener.http_error_default(self, url, fp, 690 errcode, errmsg, headers) 691 name = 'retry_' + self.type + '_basic_auth' 692 if data is None: 693 return getattr(self,name)(url, realm) 694 else: 695 return getattr(self,name)(url, realm, data) 696 697 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 698 """Error 407 -- proxy authentication required. 699 This function supports Basic authentication only.""" 700 if not 'proxy-authenticate' in headers: 701 URLopener.http_error_default(self, url, fp, 702 errcode, errmsg, headers) 703 stuff = headers['proxy-authenticate'] 704 import re 705 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 706 if not match: 707 URLopener.http_error_default(self, url, fp, 708 errcode, errmsg, headers) 709 scheme, realm = match.groups() 710 if scheme.lower() != 'basic': 711 URLopener.http_error_default(self, url, fp, 712 errcode, errmsg, headers) 713 name = 'retry_proxy_' + self.type + '_basic_auth' 714 if data is None: 715 return getattr(self,name)(url, realm) 716 else: 717 return getattr(self,name)(url, realm, data) 718 719 def retry_proxy_http_basic_auth(self, url, realm, data=None): 720 host, selector = splithost(url) 721 newurl = 'http://' + host + selector 722 proxy = self.proxies['http'] 723 urltype, proxyhost = splittype(proxy) 724 proxyhost, proxyselector = splithost(proxyhost) 725 i = proxyhost.find('@') + 1 726 proxyhost = proxyhost[i:] 727 user, passwd = self.get_user_passwd(proxyhost, realm, i) 728 if not (user or passwd): return None 729 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 730 self.proxies['http'] = 'http://' + proxyhost + proxyselector 731 if data is None: 732 return self.open(newurl) 733 else: 734 return self.open(newurl, data) 735 736 def retry_proxy_https_basic_auth(self, url, realm, data=None): 737 host, selector = splithost(url) 738 newurl = 'https://' + host + selector 739 proxy = self.proxies['https'] 740 urltype, proxyhost = splittype(proxy) 741 proxyhost, proxyselector = splithost(proxyhost) 742 i = proxyhost.find('@') + 1 743 proxyhost = proxyhost[i:] 744 user, passwd = self.get_user_passwd(proxyhost, realm, i) 745 if not (user or passwd): return None 746 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 747 self.proxies['https'] = 'https://' + proxyhost + proxyselector 748 if data is None: 749 return self.open(newurl) 750 else: 751 return self.open(newurl, data) 752 753 def retry_http_basic_auth(self, url, realm, data=None): 754 host, selector = splithost(url) 755 i = host.find('@') + 1 756 host = host[i:] 757 user, passwd = self.get_user_passwd(host, realm, i) 758 if not (user or passwd): return None 759 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 760 newurl = 'http://' + host + selector 761 if data is None: 762 return self.open(newurl) 763 else: 764 return self.open(newurl, data) 765 766 def retry_https_basic_auth(self, url, realm, data=None): 767 host, selector = splithost(url) 768 i = host.find('@') + 1 769 host = host[i:] 770 user, passwd = self.get_user_passwd(host, realm, i) 771 if not (user or passwd): return None 772 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 773 newurl = 'https://' + host + selector 774 if data is None: 775 return self.open(newurl) 776 else: 777 return self.open(newurl, data) 778 779 def get_user_passwd(self, host, realm, clear_cache=0): 780 key = realm + '@' + host.lower() 781 if key in self.auth_cache: 782 if clear_cache: 783 del self.auth_cache[key] 784 else: 785 return self.auth_cache[key] 786 user, passwd = self.prompt_user_passwd(host, realm) 787 if user or passwd: self.auth_cache[key] = (user, passwd) 788 return user, passwd 789 790 def prompt_user_passwd(self, host, realm): 791 """Override this in a GUI environment!""" 792 import getpass 793 try: 794 user = raw_input("Enter username for %s at %s: " % (realm, 795 host)) 796 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 797 (user, realm, host)) 798 return user, passwd 799 except KeyboardInterrupt: 800 print 801 return None, None 802 803 804# Utility functions 805 806_localhost = None 807def localhost(): 808 """Return the IP address of the magic hostname 'localhost'.""" 809 global _localhost 810 if _localhost is None: 811 _localhost = socket.gethostbyname('localhost') 812 return _localhost 813 814_thishost = None 815def thishost(): 816 """Return the IP address of the current host.""" 817 global _thishost 818 if _thishost is None: 819 _thishost = socket.gethostbyname(socket.gethostname()) 820 return _thishost 821 822_ftperrors = None 823def ftperrors(): 824 """Return the set of errors raised by the FTP class.""" 825 global _ftperrors 826 if _ftperrors is None: 827 import ftplib 828 _ftperrors = ftplib.all_errors 829 return _ftperrors 830 831_noheaders = None 832def noheaders(): 833 """Return an empty mimetools.Message object.""" 834 global _noheaders 835 if _noheaders is None: 836 import mimetools 837 try: 838 from cStringIO import StringIO 839 except ImportError: 840 from StringIO import StringIO 841 _noheaders = mimetools.Message(StringIO(), 0) 842 _noheaders.fp.close() # Recycle file descriptor 843 return _noheaders 844 845 846# Utility classes 847 848class ftpwrapper: 849 """Class used by open_ftp() for cache of open FTP connections.""" 850 851 def __init__(self, user, passwd, host, port, dirs, 852 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 853 persistent=True): 854 self.user = user 855 self.passwd = passwd 856 self.host = host 857 self.port = port 858 self.dirs = dirs 859 self.timeout = timeout 860 self.refcount = 0 861 self.keepalive = persistent 862 self.init() 863 864 def init(self): 865 import ftplib 866 self.busy = 0 867 self.ftp = ftplib.FTP() 868 self.ftp.connect(self.host, self.port, self.timeout) 869 self.ftp.login(self.user, self.passwd) 870 for dir in self.dirs: 871 self.ftp.cwd(dir) 872 873 def retrfile(self, file, type): 874 import ftplib 875 self.endtransfer() 876 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 877 else: cmd = 'TYPE ' + type; isdir = 0 878 try: 879 self.ftp.voidcmd(cmd) 880 except ftplib.all_errors: 881 self.init() 882 self.ftp.voidcmd(cmd) 883 conn = None 884 if file and not isdir: 885 # Try to retrieve as a file 886 try: 887 cmd = 'RETR ' + file 888 conn, retrlen = self.ftp.ntransfercmd(cmd) 889 except ftplib.error_perm, reason: 890 if str(reason)[:3] != '550': 891 raise IOError, ('ftp error', reason), sys.exc_info()[2] 892 if not conn: 893 # Set transfer mode to ASCII! 894 self.ftp.voidcmd('TYPE A') 895 # Try a directory listing. Verify that directory exists. 896 if file: 897 pwd = self.ftp.pwd() 898 try: 899 try: 900 self.ftp.cwd(file) 901 except ftplib.error_perm, reason: 902 raise IOError, ('ftp error', reason), sys.exc_info()[2] 903 finally: 904 self.ftp.cwd(pwd) 905 cmd = 'LIST ' + file 906 else: 907 cmd = 'LIST' 908 conn, retrlen = self.ftp.ntransfercmd(cmd) 909 self.busy = 1 910 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 911 self.refcount += 1 912 conn.close() 913 # Pass back both a suitably decorated object and a retrieval length 914 return (ftpobj, retrlen) 915 916 def endtransfer(self): 917 if not self.busy: 918 return 919 self.busy = 0 920 try: 921 self.ftp.voidresp() 922 except ftperrors(): 923 pass 924 925 def close(self): 926 self.keepalive = False 927 if self.refcount <= 0: 928 self.real_close() 929 930 def file_close(self): 931 self.endtransfer() 932 self.refcount -= 1 933 if self.refcount <= 0 and not self.keepalive: 934 self.real_close() 935 936 def real_close(self): 937 self.endtransfer() 938 try: 939 self.ftp.close() 940 except ftperrors(): 941 pass 942 943class addbase: 944 """Base class for addinfo and addclosehook.""" 945 946 def __init__(self, fp): 947 self.fp = fp 948 self.read = self.fp.read 949 self.readline = self.fp.readline 950 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 951 if hasattr(self.fp, "fileno"): 952 self.fileno = self.fp.fileno 953 else: 954 self.fileno = lambda: None 955 if hasattr(self.fp, "__iter__"): 956 self.__iter__ = self.fp.__iter__ 957 if hasattr(self.fp, "next"): 958 self.next = self.fp.next 959 960 def __repr__(self): 961 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 962 id(self), self.fp) 963 964 def close(self): 965 self.read = None 966 self.readline = None 967 self.readlines = None 968 self.fileno = None 969 if self.fp: self.fp.close() 970 self.fp = None 971 972class addclosehook(addbase): 973 """Class to add a close hook to an open file.""" 974 975 def __init__(self, fp, closehook, *hookargs): 976 addbase.__init__(self, fp) 977 self.closehook = closehook 978 self.hookargs = hookargs 979 980 def close(self): 981 addbase.close(self) 982 if self.closehook: 983 self.closehook(*self.hookargs) 984 self.closehook = None 985 self.hookargs = None 986 987class addinfo(addbase): 988 """class to add an info() method to an open file.""" 989 990 def __init__(self, fp, headers): 991 addbase.__init__(self, fp) 992 self.headers = headers 993 994 def info(self): 995 return self.headers 996 997class addinfourl(addbase): 998 """class to add info() and geturl() methods to an open file.""" 999 1000 def __init__(self, fp, headers, url, code=None): 1001 addbase.__init__(self, fp) 1002 self.headers = headers 1003 self.url = url 1004 self.code = code 1005 1006 def info(self): 1007 return self.headers 1008 1009 def getcode(self): 1010 return self.code 1011 1012 def geturl(self): 1013 return self.url 1014 1015 1016# Utilities to parse URLs (most of these return None for missing parts): 1017# unwrap('<URL:type://host/path>') --> 'type://host/path' 1018# splittype('type:opaquestring') --> 'type', 'opaquestring' 1019# splithost('//host[:port]/path') --> 'host[:port]', '/path' 1020# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 1021# splitpasswd('user:passwd') -> 'user', 'passwd' 1022# splitport('host:port') --> 'host', 'port' 1023# splitquery('/path?query') --> '/path', 'query' 1024# splittag('/path#tag') --> '/path', 'tag' 1025# splitattr('/path;attr1=value1;attr2=value2;...') -> 1026# '/path', ['attr1=value1', 'attr2=value2', ...] 1027# splitvalue('attr=value') --> 'attr', 'value' 1028# unquote('abc%20def') -> 'abc def' 1029# quote('abc def') -> 'abc%20def') 1030 1031try: 1032 unicode 1033except NameError: 1034 def _is_unicode(x): 1035 return 0 1036else: 1037 def _is_unicode(x): 1038 return isinstance(x, unicode) 1039 1040def toBytes(url): 1041 """toBytes(u"URL") --> 'URL'.""" 1042 # Most URL schemes require ASCII. If that changes, the conversion 1043 # can be relaxed 1044 if _is_unicode(url): 1045 try: 1046 url = url.encode("ASCII") 1047 except UnicodeError: 1048 raise UnicodeError("URL " + repr(url) + 1049 " contains non-ASCII characters") 1050 return url 1051 1052def unwrap(url): 1053 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 1054 url = url.strip() 1055 if url[:1] == '<' and url[-1:] == '>': 1056 url = url[1:-1].strip() 1057 if url[:4] == 'URL:': url = url[4:].strip() 1058 return url 1059 1060_typeprog = None 1061def splittype(url): 1062 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 1063 global _typeprog 1064 if _typeprog is None: 1065 import re 1066 _typeprog = re.compile('^([^/:]+):') 1067 1068 match = _typeprog.match(url) 1069 if match: 1070 scheme = match.group(1) 1071 return scheme.lower(), url[len(scheme) + 1:] 1072 return None, url 1073 1074_hostprog = None 1075def splithost(url): 1076 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 1077 global _hostprog 1078 if _hostprog is None: 1079 import re 1080 _hostprog = re.compile('^//([^/?]*)(.*)$') 1081 1082 match = _hostprog.match(url) 1083 if match: 1084 host_port = match.group(1) 1085 path = match.group(2) 1086 if path and not path.startswith('/'): 1087 path = '/' + path 1088 return host_port, path 1089 return None, url 1090 1091_userprog = None 1092def splituser(host): 1093 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 1094 global _userprog 1095 if _userprog is None: 1096 import re 1097 _userprog = re.compile('^(.*)@(.*)$') 1098 1099 match = _userprog.match(host) 1100 if match: return match.group(1, 2) 1101 return None, host 1102 1103_passwdprog = None 1104def splitpasswd(user): 1105 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 1106 global _passwdprog 1107 if _passwdprog is None: 1108 import re 1109 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 1110 1111 match = _passwdprog.match(user) 1112 if match: return match.group(1, 2) 1113 return user, None 1114 1115# splittag('/path#tag') --> '/path', 'tag' 1116_portprog = None 1117def splitport(host): 1118 """splitport('host:port') --> 'host', 'port'.""" 1119 global _portprog 1120 if _portprog is None: 1121 import re 1122 _portprog = re.compile('^(.*):([0-9]+)$') 1123 1124 match = _portprog.match(host) 1125 if match: return match.group(1, 2) 1126 return host, None 1127 1128_nportprog = None 1129def splitnport(host, defport=-1): 1130 """Split host and port, returning numeric port. 1131 Return given default port if no ':' found; defaults to -1. 1132 Return numerical port if a valid number are found after ':'. 1133 Return None if ':' but not a valid number.""" 1134 global _nportprog 1135 if _nportprog is None: 1136 import re 1137 _nportprog = re.compile('^(.*):(.*)$') 1138 1139 match = _nportprog.match(host) 1140 if match: 1141 host, port = match.group(1, 2) 1142 try: 1143 if not port: raise ValueError, "no digits" 1144 nport = int(port) 1145 except ValueError: 1146 nport = None 1147 return host, nport 1148 return host, defport 1149 1150_queryprog = None 1151def splitquery(url): 1152 """splitquery('/path?query') --> '/path', 'query'.""" 1153 global _queryprog 1154 if _queryprog is None: 1155 import re 1156 _queryprog = re.compile('^(.*)\?([^?]*)$') 1157 1158 match = _queryprog.match(url) 1159 if match: return match.group(1, 2) 1160 return url, None 1161 1162_tagprog = None 1163def splittag(url): 1164 """splittag('/path#tag') --> '/path', 'tag'.""" 1165 global _tagprog 1166 if _tagprog is None: 1167 import re 1168 _tagprog = re.compile('^(.*)#([^#]*)$') 1169 1170 match = _tagprog.match(url) 1171 if match: return match.group(1, 2) 1172 return url, None 1173 1174def splitattr(url): 1175 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1176 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1177 words = url.split(';') 1178 return words[0], words[1:] 1179 1180_valueprog = None 1181def splitvalue(attr): 1182 """splitvalue('attr=value') --> 'attr', 'value'.""" 1183 global _valueprog 1184 if _valueprog is None: 1185 import re 1186 _valueprog = re.compile('^([^=]*)=(.*)$') 1187 1188 match = _valueprog.match(attr) 1189 if match: return match.group(1, 2) 1190 return attr, None 1191 1192# urlparse contains a duplicate of this method to avoid a circular import. If 1193# you update this method, also update the copy in urlparse. This code 1194# duplication does not exist in Python3. 1195 1196_hexdig = '0123456789ABCDEFabcdef' 1197_hextochr = dict((a + b, chr(int(a + b, 16))) 1198 for a in _hexdig for b in _hexdig) 1199 1200def unquote(s): 1201 """unquote('abc%20def') -> 'abc def'.""" 1202 res = s.split('%') 1203 # fastpath 1204 if len(res) == 1: 1205 return s 1206 s = res[0] 1207 for item in res[1:]: 1208 try: 1209 s += _hextochr[item[:2]] + item[2:] 1210 except KeyError: 1211 s += '%' + item 1212 except UnicodeDecodeError: 1213 s += unichr(int(item[:2], 16)) + item[2:] 1214 return s 1215 1216def unquote_plus(s): 1217 """unquote('%7e/abc+def') -> '~/abc def'""" 1218 s = s.replace('+', ' ') 1219 return unquote(s) 1220 1221always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1222 'abcdefghijklmnopqrstuvwxyz' 1223 '0123456789' '_.-') 1224_safe_map = {} 1225for i, c in zip(xrange(256), str(bytearray(xrange(256)))): 1226 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 1227_safe_quoters = {} 1228 1229def quote(s, safe='/'): 1230 """quote('abc def') -> 'abc%20def' 1231 1232 Each part of a URL, e.g. the path info, the query, etc., has a 1233 different set of reserved characters that must be quoted. 1234 1235 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1236 the following reserved characters. 1237 1238 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1239 "$" | "," 1240 1241 Each of these characters is reserved in some component of a URL, 1242 but not necessarily in all of them. 1243 1244 By default, the quote function is intended for quoting the path 1245 section of a URL. Thus, it will not encode '/'. This character 1246 is reserved, but in typical usage the quote function is being 1247 called on a path where the existing slash characters are used as 1248 reserved characters. 1249 """ 1250 # fastpath 1251 if not s: 1252 if s is None: 1253 raise TypeError('None object cannot be quoted') 1254 return s 1255 cachekey = (safe, always_safe) 1256 try: 1257 (quoter, safe) = _safe_quoters[cachekey] 1258 except KeyError: 1259 safe_map = _safe_map.copy() 1260 safe_map.update([(c, c) for c in safe]) 1261 quoter = safe_map.__getitem__ 1262 safe = always_safe + safe 1263 _safe_quoters[cachekey] = (quoter, safe) 1264 if not s.rstrip(safe): 1265 return s 1266 return ''.join(map(quoter, s)) 1267 1268def quote_plus(s, safe=''): 1269 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1270 if ' ' in s: 1271 s = quote(s, safe + ' ') 1272 return s.replace(' ', '+') 1273 return quote(s, safe) 1274 1275def urlencode(query, doseq=0): 1276 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1277 1278 If any values in the query arg are sequences and doseq is true, each 1279 sequence element is converted to a separate parameter. 1280 1281 If the query arg is a sequence of two-element tuples, the order of the 1282 parameters in the output will match the order of parameters in the 1283 input. 1284 """ 1285 1286 if hasattr(query,"items"): 1287 # mapping objects 1288 query = query.items() 1289 else: 1290 # it's a bother at times that strings and string-like objects are 1291 # sequences... 1292 try: 1293 # non-sequence items should not work with len() 1294 # non-empty strings will fail this 1295 if len(query) and not isinstance(query[0], tuple): 1296 raise TypeError 1297 # zero-length sequences of all types will get here and succeed, 1298 # but that's a minor nit - since the original implementation 1299 # allowed empty dicts that type of behavior probably should be 1300 # preserved for consistency 1301 except TypeError: 1302 ty,va,tb = sys.exc_info() 1303 raise TypeError, "not a valid non-string sequence or mapping object", tb 1304 1305 l = [] 1306 if not doseq: 1307 # preserve old behavior 1308 for k, v in query: 1309 k = quote_plus(str(k)) 1310 v = quote_plus(str(v)) 1311 l.append(k + '=' + v) 1312 else: 1313 for k, v in query: 1314 k = quote_plus(str(k)) 1315 if isinstance(v, str): 1316 v = quote_plus(v) 1317 l.append(k + '=' + v) 1318 elif _is_unicode(v): 1319 # is there a reasonable way to convert to ASCII? 1320 # encode generates a string, but "replace" or "ignore" 1321 # lose information and "strict" can raise UnicodeError 1322 v = quote_plus(v.encode("ASCII","replace")) 1323 l.append(k + '=' + v) 1324 else: 1325 try: 1326 # is this a sufficient test for sequence-ness? 1327 len(v) 1328 except TypeError: 1329 # not a sequence 1330 v = quote_plus(str(v)) 1331 l.append(k + '=' + v) 1332 else: 1333 # loop over the sequence 1334 for elt in v: 1335 l.append(k + '=' + quote_plus(str(elt))) 1336 return '&'.join(l) 1337 1338# Proxy handling 1339def getproxies_environment(): 1340 """Return a dictionary of scheme -> proxy server URL mappings. 1341 1342 Scan the environment for variables named <scheme>_proxy; 1343 this seems to be the standard convention. If you need a 1344 different way, you can pass a proxies dictionary to the 1345 [Fancy]URLopener constructor. 1346 1347 """ 1348 proxies = {} 1349 for name, value in os.environ.items(): 1350 name = name.lower() 1351 if value and name[-6:] == '_proxy': 1352 proxies[name[:-6]] = value 1353 return proxies 1354 1355def proxy_bypass_environment(host): 1356 """Test if proxies should not be used for a particular host. 1357 1358 Checks the environment for a variable named no_proxy, which should 1359 be a list of DNS suffixes separated by commas, or '*' for all hosts. 1360 """ 1361 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 1362 # '*' is special case for always bypass 1363 if no_proxy == '*': 1364 return 1 1365 # strip port off host 1366 hostonly, port = splitport(host) 1367 # check if the host ends with any of the DNS suffixes 1368 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 1369 for name in no_proxy_list: 1370 if name and (hostonly.endswith(name) or host.endswith(name)): 1371 return 1 1372 # otherwise, don't bypass 1373 return 0 1374 1375 1376if sys.platform == 'darwin': 1377 from _scproxy import _get_proxy_settings, _get_proxies 1378 1379 def proxy_bypass_macosx_sysconf(host): 1380 """ 1381 Return True iff this host shouldn't be accessed using a proxy 1382 1383 This function uses the MacOSX framework SystemConfiguration 1384 to fetch the proxy information. 1385 """ 1386 import re 1387 import socket 1388 from fnmatch import fnmatch 1389 1390 hostonly, port = splitport(host) 1391 1392 def ip2num(ipAddr): 1393 parts = ipAddr.split('.') 1394 parts = map(int, parts) 1395 if len(parts) != 4: 1396 parts = (parts + [0, 0, 0, 0])[:4] 1397 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 1398 1399 proxy_settings = _get_proxy_settings() 1400 1401 # Check for simple host names: 1402 if '.' not in host: 1403 if proxy_settings['exclude_simple']: 1404 return True 1405 1406 hostIP = None 1407 1408 for value in proxy_settings.get('exceptions', ()): 1409 # Items in the list are strings like these: *.local, 169.254/16 1410 if not value: continue 1411 1412 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 1413 if m is not None: 1414 if hostIP is None: 1415 try: 1416 hostIP = socket.gethostbyname(hostonly) 1417 hostIP = ip2num(hostIP) 1418 except socket.error: 1419 continue 1420 1421 base = ip2num(m.group(1)) 1422 mask = m.group(2) 1423 if mask is None: 1424 mask = 8 * (m.group(1).count('.') + 1) 1425 1426 else: 1427 mask = int(mask[1:]) 1428 mask = 32 - mask 1429 1430 if (hostIP >> mask) == (base >> mask): 1431 return True 1432 1433 elif fnmatch(host, value): 1434 return True 1435 1436 return False 1437 1438 def getproxies_macosx_sysconf(): 1439 """Return a dictionary of scheme -> proxy server URL mappings. 1440 1441 This function uses the MacOSX framework SystemConfiguration 1442 to fetch the proxy information. 1443 """ 1444 return _get_proxies() 1445 1446 def proxy_bypass(host): 1447 if getproxies_environment(): 1448 return proxy_bypass_environment(host) 1449 else: 1450 return proxy_bypass_macosx_sysconf(host) 1451 1452 def getproxies(): 1453 return getproxies_environment() or getproxies_macosx_sysconf() 1454 1455elif os.name == 'nt': 1456 def getproxies_registry(): 1457 """Return a dictionary of scheme -> proxy server URL mappings. 1458 1459 Win32 uses the registry to store proxies. 1460 1461 """ 1462 proxies = {} 1463 try: 1464 import _winreg 1465 except ImportError: 1466 # Std module, so should be around - but you never know! 1467 return proxies 1468 try: 1469 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1470 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1471 proxyEnable = _winreg.QueryValueEx(internetSettings, 1472 'ProxyEnable')[0] 1473 if proxyEnable: 1474 # Returned as Unicode but problems if not converted to ASCII 1475 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1476 'ProxyServer')[0]) 1477 if '=' in proxyServer: 1478 # Per-protocol settings 1479 for p in proxyServer.split(';'): 1480 protocol, address = p.split('=', 1) 1481 # See if address has a type:// prefix 1482 import re 1483 if not re.match('^([^/:]+)://', address): 1484 address = '%s://%s' % (protocol, address) 1485 proxies[protocol] = address 1486 else: 1487 # Use one setting for all protocols 1488 if proxyServer[:5] == 'http:': 1489 proxies['http'] = proxyServer 1490 else: 1491 proxies['http'] = 'http://%s' % proxyServer 1492 proxies['https'] = 'https://%s' % proxyServer 1493 proxies['ftp'] = 'ftp://%s' % proxyServer 1494 internetSettings.Close() 1495 except (WindowsError, ValueError, TypeError): 1496 # Either registry key not found etc, or the value in an 1497 # unexpected format. 1498 # proxies already set up to be empty so nothing to do 1499 pass 1500 return proxies 1501 1502 def getproxies(): 1503 """Return a dictionary of scheme -> proxy server URL mappings. 1504 1505 Returns settings gathered from the environment, if specified, 1506 or the registry. 1507 1508 """ 1509 return getproxies_environment() or getproxies_registry() 1510 1511 def proxy_bypass_registry(host): 1512 try: 1513 import _winreg 1514 import re 1515 except ImportError: 1516 # Std modules, so should be around - but you never know! 1517 return 0 1518 try: 1519 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1520 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1521 proxyEnable = _winreg.QueryValueEx(internetSettings, 1522 'ProxyEnable')[0] 1523 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1524 'ProxyOverride')[0]) 1525 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1526 except WindowsError: 1527 return 0 1528 if not proxyEnable or not proxyOverride: 1529 return 0 1530 # try to make a host list from name and IP address. 1531 rawHost, port = splitport(host) 1532 host = [rawHost] 1533 try: 1534 addr = socket.gethostbyname(rawHost) 1535 if addr != rawHost: 1536 host.append(addr) 1537 except socket.error: 1538 pass 1539 try: 1540 fqdn = socket.getfqdn(rawHost) 1541 if fqdn != rawHost: 1542 host.append(fqdn) 1543 except socket.error: 1544 pass 1545 # make a check value list from the registry entry: replace the 1546 # '<local>' string by the localhost entry and the corresponding 1547 # canonical entry. 1548 proxyOverride = proxyOverride.split(';') 1549 # now check if we match one of the registry values. 1550 for test in proxyOverride: 1551 if test == '<local>': 1552 if '.' not in rawHost: 1553 return 1 1554 test = test.replace(".", r"\.") # mask dots 1555 test = test.replace("*", r".*") # change glob sequence 1556 test = test.replace("?", r".") # change glob char 1557 for val in host: 1558 # print "%s <--> %s" %( test, val ) 1559 if re.match(test, val, re.I): 1560 return 1 1561 return 0 1562 1563 def proxy_bypass(host): 1564 """Return a dictionary of scheme -> proxy server URL mappings. 1565 1566 Returns settings gathered from the environment, if specified, 1567 or the registry. 1568 1569 """ 1570 if getproxies_environment(): 1571 return proxy_bypass_environment(host) 1572 else: 1573 return proxy_bypass_registry(host) 1574 1575else: 1576 # By default use environment variables 1577 getproxies = getproxies_environment 1578 proxy_bypass = proxy_bypass_environment 1579 1580# Test and time quote() and unquote() 1581def test1(): 1582 s = '' 1583 for i in range(256): s = s + chr(i) 1584 s = s*4 1585 t0 = time.time() 1586 qs = quote(s) 1587 uqs = unquote(qs) 1588 t1 = time.time() 1589 if uqs != s: 1590 print 'Wrong!' 1591 print repr(s) 1592 print repr(qs) 1593 print repr(uqs) 1594 print round(t1 - t0, 3), 'sec' 1595 1596 1597def reporthook(blocknum, blocksize, totalsize): 1598 # Report during remote transfers 1599 print "Block number: %d, Block size: %d, Total size: %d" % ( 1600 blocknum, blocksize, totalsize) 1601