urllib.py revision 4c59211bd5b136880bb3b5c6aef033e2b62c1019
1"""Open an arbitrary URL. 2 3See the following document for more info on URLs: 4"Names and Addresses, URIs, URLs, URNs, URCs", at 5http://www.w3.org/pub/WWW/Addressing/Overview.html 6 7See also the HTTP spec (from which the error codes are derived): 8"HTTP - Hypertext Transfer Protocol", at 9http://www.w3.org/pub/WWW/Protocols/ 10 11Related standards and specs: 12- RFC1808: the "relative URL" spec. (authoritative status) 13- RFC1738 - the "URL standard". (authoritative status) 14- RFC1630 - the "URI spec". (informational status) 15 16The object returned by URLopener().open(file) will differ per 17protocol. All you know is that is has methods read(), readline(), 18readlines(), fileno(), close() and info(). The read*(), fileno() 19and close() methods work like those of open files. 20The info() method returns a mimetools.Message object which can be 21used to query various info about the object, if available. 22(mimetools.Message objects are queried with the getheader() method.) 23""" 24 25import string 26import socket 27import os 28import time 29import sys 30import base64 31 32from urlparse import urljoin as basejoin 33 34__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 35 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 36 "urlencode", "url2pathname", "pathname2url", "splittag", 37 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 38 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 39 "splitnport", "splitquery", "splitattr", "splitvalue", 40 "getproxies"] 41 42__version__ = '1.17' # XXX This version is not always updated :-( 43 44MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 45 46# Helper for non-unix systems 47if os.name == 'nt': 48 from nturl2path import url2pathname, pathname2url 49elif os.name == 'riscos': 50 from rourl2path import url2pathname, pathname2url 51else: 52 def url2pathname(pathname): 53 """OS-specific conversion from a relative URL of the 'file' scheme 54 to a file system path; not recommended for general use.""" 55 return unquote(pathname) 56 57 def pathname2url(pathname): 58 """OS-specific conversion from a file system path to a relative URL 59 of the 'file' scheme; not recommended for general use.""" 60 return quote(pathname) 61 62# This really consists of two pieces: 63# (1) a class which handles opening of all sorts of URLs 64# (plus assorted utilities etc.) 65# (2) a set of functions for parsing URLs 66# XXX Should these be separated out into different modules? 67 68 69# Shortcut for basic usage 70_urlopener = None 71def urlopen(url, data=None, proxies=None): 72 """Create a file-like object for the specified URL to read from.""" 73 from warnings import warnpy3k 74 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 75 "favor of urllib2.urlopen()", stacklevel=2) 76 77 global _urlopener 78 if proxies is not None: 79 opener = FancyURLopener(proxies=proxies) 80 elif not _urlopener: 81 opener = FancyURLopener() 82 _urlopener = opener 83 else: 84 opener = _urlopener 85 if data is None: 86 return opener.open(url) 87 else: 88 return opener.open(url, data) 89def urlretrieve(url, filename=None, reporthook=None, data=None): 90 global _urlopener 91 if not _urlopener: 92 _urlopener = FancyURLopener() 93 return _urlopener.retrieve(url, filename, reporthook, data) 94def urlcleanup(): 95 if _urlopener: 96 _urlopener.cleanup() 97 _safe_quoters.clear() 98 ftpcache.clear() 99 100# check for SSL 101try: 102 import ssl 103except: 104 _have_ssl = False 105else: 106 _have_ssl = True 107 108# exception raised when downloaded size does not match content-length 109class ContentTooShortError(IOError): 110 def __init__(self, message, content): 111 IOError.__init__(self, message) 112 self.content = content 113 114ftpcache = {} 115class URLopener: 116 """Class to open URLs. 117 This is a class rather than just a subroutine because we may need 118 more than one set of global protocol-specific options. 119 Note -- this is a base class for those who don't want the 120 automatic handling of errors type 302 (relocated) and 401 121 (authorization needed).""" 122 123 __tempfiles = None 124 125 version = "Python-urllib/%s" % __version__ 126 127 # Constructor 128 def __init__(self, proxies=None, **x509): 129 if proxies is None: 130 proxies = getproxies() 131 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 132 self.proxies = proxies 133 self.key_file = x509.get('key_file') 134 self.cert_file = x509.get('cert_file') 135 self.addheaders = [('User-Agent', self.version)] 136 self.__tempfiles = [] 137 self.__unlink = os.unlink # See cleanup() 138 self.tempcache = None 139 # Undocumented feature: if you assign {} to tempcache, 140 # it is used to cache files retrieved with 141 # self.retrieve(). This is not enabled by default 142 # since it does not work for changing documents (and I 143 # haven't got the logic to check expiration headers 144 # yet). 145 self.ftpcache = ftpcache 146 # Undocumented feature: you can use a different 147 # ftp cache by assigning to the .ftpcache member; 148 # in case you want logically independent URL openers 149 # XXX This is not threadsafe. Bah. 150 151 def __del__(self): 152 self.close() 153 154 def close(self): 155 self.cleanup() 156 157 def cleanup(self): 158 # This code sometimes runs when the rest of this module 159 # has already been deleted, so it can't use any globals 160 # or import anything. 161 if self.__tempfiles: 162 for file in self.__tempfiles: 163 try: 164 self.__unlink(file) 165 except OSError: 166 pass 167 del self.__tempfiles[:] 168 if self.tempcache: 169 self.tempcache.clear() 170 171 def addheader(self, *args): 172 """Add a header to be used by the HTTP interface only 173 e.g. u.addheader('Accept', 'sound/basic')""" 174 self.addheaders.append(args) 175 176 # External interface 177 def open(self, fullurl, data=None): 178 """Use URLopener().open(file) instead of open(file, 'r').""" 179 fullurl = unwrap(toBytes(fullurl)) 180 # percent encode url, fixing lame server errors for e.g, like space 181 # within url paths. 182 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 183 if self.tempcache and fullurl in self.tempcache: 184 filename, headers = self.tempcache[fullurl] 185 fp = open(filename, 'rb') 186 return addinfourl(fp, headers, fullurl) 187 urltype, url = splittype(fullurl) 188 if not urltype: 189 urltype = 'file' 190 if urltype in self.proxies: 191 proxy = self.proxies[urltype] 192 urltype, proxyhost = splittype(proxy) 193 host, selector = splithost(proxyhost) 194 url = (host, fullurl) # Signal special case to open_*() 195 else: 196 proxy = None 197 name = 'open_' + urltype 198 self.type = urltype 199 name = name.replace('-', '_') 200 if not hasattr(self, name): 201 if proxy: 202 return self.open_unknown_proxy(proxy, fullurl, data) 203 else: 204 return self.open_unknown(fullurl, data) 205 try: 206 if data is None: 207 return getattr(self, name)(url) 208 else: 209 return getattr(self, name)(url, data) 210 except socket.error, msg: 211 raise IOError, ('socket error', msg), sys.exc_info()[2] 212 213 def open_unknown(self, fullurl, data=None): 214 """Overridable interface to open unknown URL type.""" 215 type, url = splittype(fullurl) 216 raise IOError, ('url error', 'unknown url type', type) 217 218 def open_unknown_proxy(self, proxy, fullurl, data=None): 219 """Overridable interface to open unknown URL type.""" 220 type, url = splittype(fullurl) 221 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 222 223 # External interface 224 def retrieve(self, url, filename=None, reporthook=None, data=None): 225 """retrieve(url) returns (filename, headers) for a local object 226 or (tempfilename, headers) for a remote object.""" 227 url = unwrap(toBytes(url)) 228 if self.tempcache and url in self.tempcache: 229 return self.tempcache[url] 230 type, url1 = splittype(url) 231 if filename is None and (not type or type == 'file'): 232 try: 233 fp = self.open_local_file(url1) 234 hdrs = fp.info() 235 fp.close() 236 return url2pathname(splithost(url1)[1]), hdrs 237 except IOError: 238 pass 239 fp = self.open(url, data) 240 try: 241 headers = fp.info() 242 if filename: 243 tfp = open(filename, 'wb') 244 else: 245 import tempfile 246 garbage, path = splittype(url) 247 garbage, path = splithost(path or "") 248 path, garbage = splitquery(path or "") 249 path, garbage = splitattr(path or "") 250 suffix = os.path.splitext(path)[1] 251 (fd, filename) = tempfile.mkstemp(suffix) 252 self.__tempfiles.append(filename) 253 tfp = os.fdopen(fd, 'wb') 254 try: 255 result = filename, headers 256 if self.tempcache is not None: 257 self.tempcache[url] = result 258 bs = 1024*8 259 size = -1 260 read = 0 261 blocknum = 0 262 if "content-length" in headers: 263 size = int(headers["Content-Length"]) 264 if reporthook: 265 reporthook(blocknum, bs, size) 266 while 1: 267 block = fp.read(bs) 268 if block == "": 269 break 270 read += len(block) 271 tfp.write(block) 272 blocknum += 1 273 if reporthook: 274 reporthook(blocknum, bs, size) 275 finally: 276 tfp.close() 277 finally: 278 fp.close() 279 280 # raise exception if actual size does not match content-length header 281 if size >= 0 and read < size: 282 raise ContentTooShortError("retrieval incomplete: got only %i out " 283 "of %i bytes" % (read, size), result) 284 285 return result 286 287 # Each method named open_<type> knows how to open that type of URL 288 289 def open_http(self, url, data=None): 290 """Use HTTP protocol.""" 291 import httplib 292 user_passwd = None 293 proxy_passwd= None 294 if isinstance(url, str): 295 host, selector = splithost(url) 296 if host: 297 user_passwd, host = splituser(host) 298 host = unquote(host) 299 realhost = host 300 else: 301 host, selector = url 302 # check whether the proxy contains authorization information 303 proxy_passwd, host = splituser(host) 304 # now we proceed with the url we want to obtain 305 urltype, rest = splittype(selector) 306 url = rest 307 user_passwd = None 308 if urltype.lower() != 'http': 309 realhost = None 310 else: 311 realhost, rest = splithost(rest) 312 if realhost: 313 user_passwd, realhost = splituser(realhost) 314 if user_passwd: 315 selector = "%s://%s%s" % (urltype, realhost, rest) 316 if proxy_bypass(realhost): 317 host = realhost 318 319 #print "proxy via http:", host, selector 320 if not host: raise IOError, ('http error', 'no host given') 321 322 if proxy_passwd: 323 proxy_passwd = unquote(proxy_passwd) 324 proxy_auth = base64.b64encode(proxy_passwd).strip() 325 else: 326 proxy_auth = None 327 328 if user_passwd: 329 user_passwd = unquote(user_passwd) 330 auth = base64.b64encode(user_passwd).strip() 331 else: 332 auth = None 333 h = httplib.HTTP(host) 334 if data is not None: 335 h.putrequest('POST', selector) 336 h.putheader('Content-Type', 'application/x-www-form-urlencoded') 337 h.putheader('Content-Length', '%d' % len(data)) 338 else: 339 h.putrequest('GET', selector) 340 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 341 if auth: h.putheader('Authorization', 'Basic %s' % auth) 342 if realhost: h.putheader('Host', realhost) 343 for args in self.addheaders: h.putheader(*args) 344 h.endheaders(data) 345 errcode, errmsg, headers = h.getreply() 346 fp = h.getfile() 347 if errcode == -1: 348 if fp: fp.close() 349 # something went wrong with the HTTP status line 350 raise IOError, ('http protocol error', 0, 351 'got a bad status line', None) 352 # According to RFC 2616, "2xx" code indicates that the client's 353 # request was successfully received, understood, and accepted. 354 if (200 <= errcode < 300): 355 return addinfourl(fp, headers, "http:" + url, errcode) 356 else: 357 if data is None: 358 return self.http_error(url, fp, errcode, errmsg, headers) 359 else: 360 return self.http_error(url, fp, errcode, errmsg, headers, data) 361 362 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 363 """Handle http errors. 364 Derived class can override this, or provide specific handlers 365 named http_error_DDD where DDD is the 3-digit error code.""" 366 # First check if there's a specific handler for this error 367 name = 'http_error_%d' % errcode 368 if hasattr(self, name): 369 method = getattr(self, name) 370 if data is None: 371 result = method(url, fp, errcode, errmsg, headers) 372 else: 373 result = method(url, fp, errcode, errmsg, headers, data) 374 if result: return result 375 return self.http_error_default(url, fp, errcode, errmsg, headers) 376 377 def http_error_default(self, url, fp, errcode, errmsg, headers): 378 """Default error handler: close the connection and raise IOError.""" 379 fp.close() 380 raise IOError, ('http error', errcode, errmsg, headers) 381 382 if _have_ssl: 383 def open_https(self, url, data=None): 384 """Use HTTPS protocol.""" 385 386 import httplib 387 user_passwd = None 388 proxy_passwd = None 389 if isinstance(url, str): 390 host, selector = splithost(url) 391 if host: 392 user_passwd, host = splituser(host) 393 host = unquote(host) 394 realhost = host 395 else: 396 host, selector = url 397 # here, we determine, whether the proxy contains authorization information 398 proxy_passwd, host = splituser(host) 399 urltype, rest = splittype(selector) 400 url = rest 401 user_passwd = None 402 if urltype.lower() != 'https': 403 realhost = None 404 else: 405 realhost, rest = splithost(rest) 406 if realhost: 407 user_passwd, realhost = splituser(realhost) 408 if user_passwd: 409 selector = "%s://%s%s" % (urltype, realhost, rest) 410 #print "proxy via https:", host, selector 411 if not host: raise IOError, ('https error', 'no host given') 412 if proxy_passwd: 413 proxy_passwd = unquote(proxy_passwd) 414 proxy_auth = base64.b64encode(proxy_passwd).strip() 415 else: 416 proxy_auth = None 417 if user_passwd: 418 user_passwd = unquote(user_passwd) 419 auth = base64.b64encode(user_passwd).strip() 420 else: 421 auth = None 422 h = httplib.HTTPS(host, 0, 423 key_file=self.key_file, 424 cert_file=self.cert_file) 425 if data is not None: 426 h.putrequest('POST', selector) 427 h.putheader('Content-Type', 428 'application/x-www-form-urlencoded') 429 h.putheader('Content-Length', '%d' % len(data)) 430 else: 431 h.putrequest('GET', selector) 432 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 433 if auth: h.putheader('Authorization', 'Basic %s' % auth) 434 if realhost: h.putheader('Host', realhost) 435 for args in self.addheaders: h.putheader(*args) 436 h.endheaders(data) 437 errcode, errmsg, headers = h.getreply() 438 fp = h.getfile() 439 if errcode == -1: 440 if fp: fp.close() 441 # something went wrong with the HTTP status line 442 raise IOError, ('http protocol error', 0, 443 'got a bad status line', None) 444 # According to RFC 2616, "2xx" code indicates that the client's 445 # request was successfully received, understood, and accepted. 446 if (200 <= errcode < 300): 447 return addinfourl(fp, headers, "https:" + url, errcode) 448 else: 449 if data is None: 450 return self.http_error(url, fp, errcode, errmsg, headers) 451 else: 452 return self.http_error(url, fp, errcode, errmsg, headers, 453 data) 454 455 def open_file(self, url): 456 """Use local file or FTP depending on form of URL.""" 457 if not isinstance(url, str): 458 raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 459 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 460 return self.open_ftp(url) 461 else: 462 return self.open_local_file(url) 463 464 def open_local_file(self, url): 465 """Use local file.""" 466 import mimetypes, mimetools, email.utils 467 try: 468 from cStringIO import StringIO 469 except ImportError: 470 from StringIO import StringIO 471 host, file = splithost(url) 472 localname = url2pathname(file) 473 try: 474 stats = os.stat(localname) 475 except OSError, e: 476 raise IOError(e.errno, e.strerror, e.filename) 477 size = stats.st_size 478 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 479 mtype = mimetypes.guess_type(url)[0] 480 headers = mimetools.Message(StringIO( 481 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 482 (mtype or 'text/plain', size, modified))) 483 if not host: 484 urlfile = file 485 if file[:1] == '/': 486 urlfile = 'file://' + file 487 elif file[:2] == './': 488 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 489 return addinfourl(open(localname, 'rb'), 490 headers, urlfile) 491 host, port = splitport(host) 492 if not port \ 493 and socket.gethostbyname(host) in (localhost(), thishost()): 494 urlfile = file 495 if file[:1] == '/': 496 urlfile = 'file://' + file 497 return addinfourl(open(localname, 'rb'), 498 headers, urlfile) 499 raise IOError, ('local file error', 'not on local host') 500 501 def open_ftp(self, url): 502 """Use FTP protocol.""" 503 if not isinstance(url, str): 504 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 505 import mimetypes, mimetools 506 try: 507 from cStringIO import StringIO 508 except ImportError: 509 from StringIO import StringIO 510 host, path = splithost(url) 511 if not host: raise IOError, ('ftp error', 'no host given') 512 host, port = splitport(host) 513 user, host = splituser(host) 514 if user: user, passwd = splitpasswd(user) 515 else: passwd = None 516 host = unquote(host) 517 user = user or '' 518 passwd = passwd or '' 519 host = socket.gethostbyname(host) 520 if not port: 521 import ftplib 522 port = ftplib.FTP_PORT 523 else: 524 port = int(port) 525 path, attrs = splitattr(path) 526 path = unquote(path) 527 dirs = path.split('/') 528 dirs, file = dirs[:-1], dirs[-1] 529 if dirs and not dirs[0]: dirs = dirs[1:] 530 if dirs and not dirs[0]: dirs[0] = '/' 531 key = user, host, port, '/'.join(dirs) 532 # XXX thread unsafe! 533 if len(self.ftpcache) > MAXFTPCACHE: 534 # Prune the cache, rather arbitrarily 535 for k in self.ftpcache.keys(): 536 if k != key: 537 v = self.ftpcache[k] 538 del self.ftpcache[k] 539 v.close() 540 try: 541 if not key in self.ftpcache: 542 self.ftpcache[key] = \ 543 ftpwrapper(user, passwd, host, port, dirs) 544 if not file: type = 'D' 545 else: type = 'I' 546 for attr in attrs: 547 attr, value = splitvalue(attr) 548 if attr.lower() == 'type' and \ 549 value in ('a', 'A', 'i', 'I', 'd', 'D'): 550 type = value.upper() 551 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 552 mtype = mimetypes.guess_type("ftp:" + url)[0] 553 headers = "" 554 if mtype: 555 headers += "Content-Type: %s\n" % mtype 556 if retrlen is not None and retrlen >= 0: 557 headers += "Content-Length: %d\n" % retrlen 558 headers = mimetools.Message(StringIO(headers)) 559 return addinfourl(fp, headers, "ftp:" + url) 560 except ftperrors(), msg: 561 raise IOError, ('ftp error', msg), sys.exc_info()[2] 562 563 def open_data(self, url, data=None): 564 """Use "data" URL.""" 565 if not isinstance(url, str): 566 raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 567 # ignore POSTed data 568 # 569 # syntax of data URLs: 570 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 571 # mediatype := [ type "/" subtype ] *( ";" parameter ) 572 # data := *urlchar 573 # parameter := attribute "=" value 574 import mimetools 575 try: 576 from cStringIO import StringIO 577 except ImportError: 578 from StringIO import StringIO 579 try: 580 [type, data] = url.split(',', 1) 581 except ValueError: 582 raise IOError, ('data error', 'bad data URL') 583 if not type: 584 type = 'text/plain;charset=US-ASCII' 585 semi = type.rfind(';') 586 if semi >= 0 and '=' not in type[semi:]: 587 encoding = type[semi+1:] 588 type = type[:semi] 589 else: 590 encoding = '' 591 msg = [] 592 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 593 time.gmtime(time.time()))) 594 msg.append('Content-type: %s' % type) 595 if encoding == 'base64': 596 data = base64.decodestring(data) 597 else: 598 data = unquote(data) 599 msg.append('Content-Length: %d' % len(data)) 600 msg.append('') 601 msg.append(data) 602 msg = '\n'.join(msg) 603 f = StringIO(msg) 604 headers = mimetools.Message(f, 0) 605 #f.fileno = None # needed for addinfourl 606 return addinfourl(f, headers, url) 607 608 609class FancyURLopener(URLopener): 610 """Derived class with handlers for errors we can handle (perhaps).""" 611 612 def __init__(self, *args, **kwargs): 613 URLopener.__init__(self, *args, **kwargs) 614 self.auth_cache = {} 615 self.tries = 0 616 self.maxtries = 10 617 618 def http_error_default(self, url, fp, errcode, errmsg, headers): 619 """Default error handling -- don't raise an exception.""" 620 return addinfourl(fp, headers, "http:" + url, errcode) 621 622 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 623 """Error 302 -- relocated (temporarily).""" 624 self.tries += 1 625 if self.maxtries and self.tries >= self.maxtries: 626 if hasattr(self, "http_error_500"): 627 meth = self.http_error_500 628 else: 629 meth = self.http_error_default 630 self.tries = 0 631 return meth(url, fp, 500, 632 "Internal Server Error: Redirect Recursion", headers) 633 result = self.redirect_internal(url, fp, errcode, errmsg, headers, 634 data) 635 self.tries = 0 636 return result 637 638 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 639 if 'location' in headers: 640 newurl = headers['location'] 641 elif 'uri' in headers: 642 newurl = headers['uri'] 643 else: 644 return 645 fp.close() 646 # In case the server sent a relative URL, join with original: 647 newurl = basejoin(self.type + ":" + url, newurl) 648 649 # For security reasons we do not allow redirects to protocols 650 # other than HTTP, HTTPS or FTP. 651 newurl_lower = newurl.lower() 652 if not (newurl_lower.startswith('http://') or 653 newurl_lower.startswith('https://') or 654 newurl_lower.startswith('ftp://')): 655 raise IOError('redirect error', errcode, 656 errmsg + " - Redirection to url '%s' is not allowed" % 657 newurl, 658 headers) 659 660 return self.open(newurl) 661 662 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 663 """Error 301 -- also relocated (permanently).""" 664 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 665 666 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 667 """Error 303 -- also relocated (essentially identical to 302).""" 668 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 669 670 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 671 """Error 307 -- relocated, but turn POST into error.""" 672 if data is None: 673 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 674 else: 675 return self.http_error_default(url, fp, errcode, errmsg, headers) 676 677 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 678 """Error 401 -- authentication required. 679 This function supports Basic authentication only.""" 680 if not 'www-authenticate' in headers: 681 URLopener.http_error_default(self, url, fp, 682 errcode, errmsg, headers) 683 stuff = headers['www-authenticate'] 684 import re 685 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 686 if not match: 687 URLopener.http_error_default(self, url, fp, 688 errcode, errmsg, headers) 689 scheme, realm = match.groups() 690 if scheme.lower() != 'basic': 691 URLopener.http_error_default(self, url, fp, 692 errcode, errmsg, headers) 693 name = 'retry_' + self.type + '_basic_auth' 694 if data is None: 695 return getattr(self,name)(url, realm) 696 else: 697 return getattr(self,name)(url, realm, data) 698 699 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 700 """Error 407 -- proxy authentication required. 701 This function supports Basic authentication only.""" 702 if not 'proxy-authenticate' in headers: 703 URLopener.http_error_default(self, url, fp, 704 errcode, errmsg, headers) 705 stuff = headers['proxy-authenticate'] 706 import re 707 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 708 if not match: 709 URLopener.http_error_default(self, url, fp, 710 errcode, errmsg, headers) 711 scheme, realm = match.groups() 712 if scheme.lower() != 'basic': 713 URLopener.http_error_default(self, url, fp, 714 errcode, errmsg, headers) 715 name = 'retry_proxy_' + self.type + '_basic_auth' 716 if data is None: 717 return getattr(self,name)(url, realm) 718 else: 719 return getattr(self,name)(url, realm, data) 720 721 def retry_proxy_http_basic_auth(self, url, realm, data=None): 722 host, selector = splithost(url) 723 newurl = 'http://' + host + selector 724 proxy = self.proxies['http'] 725 urltype, proxyhost = splittype(proxy) 726 proxyhost, proxyselector = splithost(proxyhost) 727 i = proxyhost.find('@') + 1 728 proxyhost = proxyhost[i:] 729 user, passwd = self.get_user_passwd(proxyhost, realm, i) 730 if not (user or passwd): return None 731 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 732 self.proxies['http'] = 'http://' + proxyhost + proxyselector 733 if data is None: 734 return self.open(newurl) 735 else: 736 return self.open(newurl, data) 737 738 def retry_proxy_https_basic_auth(self, url, realm, data=None): 739 host, selector = splithost(url) 740 newurl = 'https://' + host + selector 741 proxy = self.proxies['https'] 742 urltype, proxyhost = splittype(proxy) 743 proxyhost, proxyselector = splithost(proxyhost) 744 i = proxyhost.find('@') + 1 745 proxyhost = proxyhost[i:] 746 user, passwd = self.get_user_passwd(proxyhost, realm, i) 747 if not (user or passwd): return None 748 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 749 self.proxies['https'] = 'https://' + proxyhost + proxyselector 750 if data is None: 751 return self.open(newurl) 752 else: 753 return self.open(newurl, data) 754 755 def retry_http_basic_auth(self, url, realm, data=None): 756 host, selector = splithost(url) 757 i = host.find('@') + 1 758 host = host[i:] 759 user, passwd = self.get_user_passwd(host, realm, i) 760 if not (user or passwd): return None 761 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 762 newurl = 'http://' + host + selector 763 if data is None: 764 return self.open(newurl) 765 else: 766 return self.open(newurl, data) 767 768 def retry_https_basic_auth(self, url, realm, data=None): 769 host, selector = splithost(url) 770 i = host.find('@') + 1 771 host = host[i:] 772 user, passwd = self.get_user_passwd(host, realm, i) 773 if not (user or passwd): return None 774 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 775 newurl = 'https://' + host + selector 776 if data is None: 777 return self.open(newurl) 778 else: 779 return self.open(newurl, data) 780 781 def get_user_passwd(self, host, realm, clear_cache=0): 782 key = realm + '@' + host.lower() 783 if key in self.auth_cache: 784 if clear_cache: 785 del self.auth_cache[key] 786 else: 787 return self.auth_cache[key] 788 user, passwd = self.prompt_user_passwd(host, realm) 789 if user or passwd: self.auth_cache[key] = (user, passwd) 790 return user, passwd 791 792 def prompt_user_passwd(self, host, realm): 793 """Override this in a GUI environment!""" 794 import getpass 795 try: 796 user = raw_input("Enter username for %s at %s: " % (realm, 797 host)) 798 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 799 (user, realm, host)) 800 return user, passwd 801 except KeyboardInterrupt: 802 print 803 return None, None 804 805 806# Utility functions 807 808_localhost = None 809def localhost(): 810 """Return the IP address of the magic hostname 'localhost'.""" 811 global _localhost 812 if _localhost is None: 813 _localhost = socket.gethostbyname('localhost') 814 return _localhost 815 816_thishost = None 817def thishost(): 818 """Return the IP address of the current host.""" 819 global _thishost 820 if _thishost is None: 821 _thishost = socket.gethostbyname(socket.gethostname()) 822 return _thishost 823 824_ftperrors = None 825def ftperrors(): 826 """Return the set of errors raised by the FTP class.""" 827 global _ftperrors 828 if _ftperrors is None: 829 import ftplib 830 _ftperrors = ftplib.all_errors 831 return _ftperrors 832 833_noheaders = None 834def noheaders(): 835 """Return an empty mimetools.Message object.""" 836 global _noheaders 837 if _noheaders is None: 838 import mimetools 839 try: 840 from cStringIO import StringIO 841 except ImportError: 842 from StringIO import StringIO 843 _noheaders = mimetools.Message(StringIO(), 0) 844 _noheaders.fp.close() # Recycle file descriptor 845 return _noheaders 846 847 848# Utility classes 849 850class ftpwrapper: 851 """Class used by open_ftp() for cache of open FTP connections.""" 852 853 def __init__(self, user, passwd, host, port, dirs, 854 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 855 persistent=True): 856 self.user = user 857 self.passwd = passwd 858 self.host = host 859 self.port = port 860 self.dirs = dirs 861 self.timeout = timeout 862 self.refcount = 0 863 self.keepalive = persistent 864 self.init() 865 866 def init(self): 867 import ftplib 868 self.busy = 0 869 self.ftp = ftplib.FTP() 870 self.ftp.connect(self.host, self.port, self.timeout) 871 self.ftp.login(self.user, self.passwd) 872 for dir in self.dirs: 873 self.ftp.cwd(dir) 874 875 def retrfile(self, file, type): 876 import ftplib 877 self.endtransfer() 878 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 879 else: cmd = 'TYPE ' + type; isdir = 0 880 try: 881 self.ftp.voidcmd(cmd) 882 except ftplib.all_errors: 883 self.init() 884 self.ftp.voidcmd(cmd) 885 conn = None 886 if file and not isdir: 887 # Try to retrieve as a file 888 try: 889 cmd = 'RETR ' + file 890 conn, retrlen = self.ftp.ntransfercmd(cmd) 891 except ftplib.error_perm, reason: 892 if str(reason)[:3] != '550': 893 raise IOError, ('ftp error', reason), sys.exc_info()[2] 894 if not conn: 895 # Set transfer mode to ASCII! 896 self.ftp.voidcmd('TYPE A') 897 # Try a directory listing. Verify that directory exists. 898 if file: 899 pwd = self.ftp.pwd() 900 try: 901 try: 902 self.ftp.cwd(file) 903 except ftplib.error_perm, reason: 904 raise IOError, ('ftp error', reason), sys.exc_info()[2] 905 finally: 906 self.ftp.cwd(pwd) 907 cmd = 'LIST ' + file 908 else: 909 cmd = 'LIST' 910 conn, retrlen = self.ftp.ntransfercmd(cmd) 911 self.busy = 1 912 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 913 self.refcount += 1 914 conn.close() 915 # Pass back both a suitably decorated object and a retrieval length 916 return (ftpobj, retrlen) 917 918 def endtransfer(self): 919 if not self.busy: 920 return 921 self.busy = 0 922 try: 923 self.ftp.voidresp() 924 except ftperrors(): 925 pass 926 927 def close(self): 928 self.keepalive = False 929 if self.refcount <= 0: 930 self.real_close() 931 932 def file_close(self): 933 self.endtransfer() 934 self.refcount -= 1 935 if self.refcount <= 0 and not self.keepalive: 936 self.real_close() 937 938 def real_close(self): 939 self.endtransfer() 940 try: 941 self.ftp.close() 942 except ftperrors(): 943 pass 944 945class addbase: 946 """Base class for addinfo and addclosehook.""" 947 948 def __init__(self, fp): 949 self.fp = fp 950 self.read = self.fp.read 951 self.readline = self.fp.readline 952 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 953 if hasattr(self.fp, "fileno"): 954 self.fileno = self.fp.fileno 955 else: 956 self.fileno = lambda: None 957 if hasattr(self.fp, "__iter__"): 958 self.__iter__ = self.fp.__iter__ 959 if hasattr(self.fp, "next"): 960 self.next = self.fp.next 961 962 def __repr__(self): 963 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 964 id(self), self.fp) 965 966 def close(self): 967 self.read = None 968 self.readline = None 969 self.readlines = None 970 self.fileno = None 971 if self.fp: self.fp.close() 972 self.fp = None 973 974class addclosehook(addbase): 975 """Class to add a close hook to an open file.""" 976 977 def __init__(self, fp, closehook, *hookargs): 978 addbase.__init__(self, fp) 979 self.closehook = closehook 980 self.hookargs = hookargs 981 982 def close(self): 983 if self.closehook: 984 self.closehook(*self.hookargs) 985 self.closehook = None 986 self.hookargs = None 987 addbase.close(self) 988 989class addinfo(addbase): 990 """class to add an info() method to an open file.""" 991 992 def __init__(self, fp, headers): 993 addbase.__init__(self, fp) 994 self.headers = headers 995 996 def info(self): 997 return self.headers 998 999class addinfourl(addbase): 1000 """class to add info() and geturl() methods to an open file.""" 1001 1002 def __init__(self, fp, headers, url, code=None): 1003 addbase.__init__(self, fp) 1004 self.headers = headers 1005 self.url = url 1006 self.code = code 1007 1008 def info(self): 1009 return self.headers 1010 1011 def getcode(self): 1012 return self.code 1013 1014 def geturl(self): 1015 return self.url 1016 1017 1018# Utilities to parse URLs (most of these return None for missing parts): 1019# unwrap('<URL:type://host/path>') --> 'type://host/path' 1020# splittype('type:opaquestring') --> 'type', 'opaquestring' 1021# splithost('//host[:port]/path') --> 'host[:port]', '/path' 1022# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 1023# splitpasswd('user:passwd') -> 'user', 'passwd' 1024# splitport('host:port') --> 'host', 'port' 1025# splitquery('/path?query') --> '/path', 'query' 1026# splittag('/path#tag') --> '/path', 'tag' 1027# splitattr('/path;attr1=value1;attr2=value2;...') -> 1028# '/path', ['attr1=value1', 'attr2=value2', ...] 1029# splitvalue('attr=value') --> 'attr', 'value' 1030# unquote('abc%20def') -> 'abc def' 1031# quote('abc def') -> 'abc%20def') 1032 1033try: 1034 unicode 1035except NameError: 1036 def _is_unicode(x): 1037 return 0 1038else: 1039 def _is_unicode(x): 1040 return isinstance(x, unicode) 1041 1042def toBytes(url): 1043 """toBytes(u"URL") --> 'URL'.""" 1044 # Most URL schemes require ASCII. If that changes, the conversion 1045 # can be relaxed 1046 if _is_unicode(url): 1047 try: 1048 url = url.encode("ASCII") 1049 except UnicodeError: 1050 raise UnicodeError("URL " + repr(url) + 1051 " contains non-ASCII characters") 1052 return url 1053 1054def unwrap(url): 1055 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 1056 url = url.strip() 1057 if url[:1] == '<' and url[-1:] == '>': 1058 url = url[1:-1].strip() 1059 if url[:4] == 'URL:': url = url[4:].strip() 1060 return url 1061 1062_typeprog = None 1063def splittype(url): 1064 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 1065 global _typeprog 1066 if _typeprog is None: 1067 import re 1068 _typeprog = re.compile('^([^/:]+):') 1069 1070 match = _typeprog.match(url) 1071 if match: 1072 scheme = match.group(1) 1073 return scheme.lower(), url[len(scheme) + 1:] 1074 return None, url 1075 1076_hostprog = None 1077def splithost(url): 1078 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 1079 global _hostprog 1080 if _hostprog is None: 1081 import re 1082 _hostprog = re.compile('^//([^/?]*)(.*)$') 1083 1084 match = _hostprog.match(url) 1085 if match: 1086 host_port = match.group(1) 1087 path = match.group(2) 1088 if path and not path.startswith('/'): 1089 path = '/' + path 1090 return host_port, path 1091 return None, url 1092 1093_userprog = None 1094def splituser(host): 1095 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 1096 global _userprog 1097 if _userprog is None: 1098 import re 1099 _userprog = re.compile('^(.*)@(.*)$') 1100 1101 match = _userprog.match(host) 1102 if match: return match.group(1, 2) 1103 return None, host 1104 1105_passwdprog = None 1106def splitpasswd(user): 1107 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 1108 global _passwdprog 1109 if _passwdprog is None: 1110 import re 1111 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 1112 1113 match = _passwdprog.match(user) 1114 if match: return match.group(1, 2) 1115 return user, None 1116 1117# splittag('/path#tag') --> '/path', 'tag' 1118_portprog = None 1119def splitport(host): 1120 """splitport('host:port') --> 'host', 'port'.""" 1121 global _portprog 1122 if _portprog is None: 1123 import re 1124 _portprog = re.compile('^(.*):([0-9]+)$') 1125 1126 match = _portprog.match(host) 1127 if match: return match.group(1, 2) 1128 return host, None 1129 1130_nportprog = None 1131def splitnport(host, defport=-1): 1132 """Split host and port, returning numeric port. 1133 Return given default port if no ':' found; defaults to -1. 1134 Return numerical port if a valid number are found after ':'. 1135 Return None if ':' but not a valid number.""" 1136 global _nportprog 1137 if _nportprog is None: 1138 import re 1139 _nportprog = re.compile('^(.*):(.*)$') 1140 1141 match = _nportprog.match(host) 1142 if match: 1143 host, port = match.group(1, 2) 1144 try: 1145 if not port: raise ValueError, "no digits" 1146 nport = int(port) 1147 except ValueError: 1148 nport = None 1149 return host, nport 1150 return host, defport 1151 1152_queryprog = None 1153def splitquery(url): 1154 """splitquery('/path?query') --> '/path', 'query'.""" 1155 global _queryprog 1156 if _queryprog is None: 1157 import re 1158 _queryprog = re.compile('^(.*)\?([^?]*)$') 1159 1160 match = _queryprog.match(url) 1161 if match: return match.group(1, 2) 1162 return url, None 1163 1164_tagprog = None 1165def splittag(url): 1166 """splittag('/path#tag') --> '/path', 'tag'.""" 1167 global _tagprog 1168 if _tagprog is None: 1169 import re 1170 _tagprog = re.compile('^(.*)#([^#]*)$') 1171 1172 match = _tagprog.match(url) 1173 if match: return match.group(1, 2) 1174 return url, None 1175 1176def splitattr(url): 1177 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1178 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1179 words = url.split(';') 1180 return words[0], words[1:] 1181 1182_valueprog = None 1183def splitvalue(attr): 1184 """splitvalue('attr=value') --> 'attr', 'value'.""" 1185 global _valueprog 1186 if _valueprog is None: 1187 import re 1188 _valueprog = re.compile('^([^=]*)=(.*)$') 1189 1190 match = _valueprog.match(attr) 1191 if match: return match.group(1, 2) 1192 return attr, None 1193 1194# urlparse contains a duplicate of this method to avoid a circular import. If 1195# you update this method, also update the copy in urlparse. This code 1196# duplication does not exist in Python3. 1197 1198_hexdig = '0123456789ABCDEFabcdef' 1199_hextochr = dict((a + b, chr(int(a + b, 16))) 1200 for a in _hexdig for b in _hexdig) 1201 1202def unquote(s): 1203 """unquote('abc%20def') -> 'abc def'.""" 1204 res = s.split('%') 1205 # fastpath 1206 if len(res) == 1: 1207 return s 1208 s = res[0] 1209 for item in res[1:]: 1210 try: 1211 s += _hextochr[item[:2]] + item[2:] 1212 except KeyError: 1213 s += '%' + item 1214 except UnicodeDecodeError: 1215 s += unichr(int(item[:2], 16)) + item[2:] 1216 return s 1217 1218def unquote_plus(s): 1219 """unquote('%7e/abc+def') -> '~/abc def'""" 1220 s = s.replace('+', ' ') 1221 return unquote(s) 1222 1223always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1224 'abcdefghijklmnopqrstuvwxyz' 1225 '0123456789' '_.-') 1226_safe_map = {} 1227for i, c in zip(xrange(256), str(bytearray(xrange(256)))): 1228 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 1229_safe_quoters = {} 1230 1231def quote(s, safe='/'): 1232 """quote('abc def') -> 'abc%20def' 1233 1234 Each part of a URL, e.g. the path info, the query, etc., has a 1235 different set of reserved characters that must be quoted. 1236 1237 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1238 the following reserved characters. 1239 1240 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1241 "$" | "," 1242 1243 Each of these characters is reserved in some component of a URL, 1244 but not necessarily in all of them. 1245 1246 By default, the quote function is intended for quoting the path 1247 section of a URL. Thus, it will not encode '/'. This character 1248 is reserved, but in typical usage the quote function is being 1249 called on a path where the existing slash characters are used as 1250 reserved characters. 1251 """ 1252 # fastpath 1253 if not s: 1254 if s is None: 1255 raise TypeError('None object cannot be quoted') 1256 return s 1257 cachekey = (safe, always_safe) 1258 try: 1259 (quoter, safe) = _safe_quoters[cachekey] 1260 except KeyError: 1261 safe_map = _safe_map.copy() 1262 safe_map.update([(c, c) for c in safe]) 1263 quoter = safe_map.__getitem__ 1264 safe = always_safe + safe 1265 _safe_quoters[cachekey] = (quoter, safe) 1266 if not s.rstrip(safe): 1267 return s 1268 return ''.join(map(quoter, s)) 1269 1270def quote_plus(s, safe=''): 1271 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1272 if ' ' in s: 1273 s = quote(s, safe + ' ') 1274 return s.replace(' ', '+') 1275 return quote(s, safe) 1276 1277def urlencode(query, doseq=0): 1278 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1279 1280 If any values in the query arg are sequences and doseq is true, each 1281 sequence element is converted to a separate parameter. 1282 1283 If the query arg is a sequence of two-element tuples, the order of the 1284 parameters in the output will match the order of parameters in the 1285 input. 1286 """ 1287 1288 if hasattr(query,"items"): 1289 # mapping objects 1290 query = query.items() 1291 else: 1292 # it's a bother at times that strings and string-like objects are 1293 # sequences... 1294 try: 1295 # non-sequence items should not work with len() 1296 # non-empty strings will fail this 1297 if len(query) and not isinstance(query[0], tuple): 1298 raise TypeError 1299 # zero-length sequences of all types will get here and succeed, 1300 # but that's a minor nit - since the original implementation 1301 # allowed empty dicts that type of behavior probably should be 1302 # preserved for consistency 1303 except TypeError: 1304 ty,va,tb = sys.exc_info() 1305 raise TypeError, "not a valid non-string sequence or mapping object", tb 1306 1307 l = [] 1308 if not doseq: 1309 # preserve old behavior 1310 for k, v in query: 1311 k = quote_plus(str(k)) 1312 v = quote_plus(str(v)) 1313 l.append(k + '=' + v) 1314 else: 1315 for k, v in query: 1316 k = quote_plus(str(k)) 1317 if isinstance(v, str): 1318 v = quote_plus(v) 1319 l.append(k + '=' + v) 1320 elif _is_unicode(v): 1321 # is there a reasonable way to convert to ASCII? 1322 # encode generates a string, but "replace" or "ignore" 1323 # lose information and "strict" can raise UnicodeError 1324 v = quote_plus(v.encode("ASCII","replace")) 1325 l.append(k + '=' + v) 1326 else: 1327 try: 1328 # is this a sufficient test for sequence-ness? 1329 len(v) 1330 except TypeError: 1331 # not a sequence 1332 v = quote_plus(str(v)) 1333 l.append(k + '=' + v) 1334 else: 1335 # loop over the sequence 1336 for elt in v: 1337 l.append(k + '=' + quote_plus(str(elt))) 1338 return '&'.join(l) 1339 1340# Proxy handling 1341def getproxies_environment(): 1342 """Return a dictionary of scheme -> proxy server URL mappings. 1343 1344 Scan the environment for variables named <scheme>_proxy; 1345 this seems to be the standard convention. If you need a 1346 different way, you can pass a proxies dictionary to the 1347 [Fancy]URLopener constructor. 1348 1349 """ 1350 proxies = {} 1351 for name, value in os.environ.items(): 1352 name = name.lower() 1353 if value and name[-6:] == '_proxy': 1354 proxies[name[:-6]] = value 1355 return proxies 1356 1357def proxy_bypass_environment(host): 1358 """Test if proxies should not be used for a particular host. 1359 1360 Checks the environment for a variable named no_proxy, which should 1361 be a list of DNS suffixes separated by commas, or '*' for all hosts. 1362 """ 1363 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 1364 # '*' is special case for always bypass 1365 if no_proxy == '*': 1366 return 1 1367 # strip port off host 1368 hostonly, port = splitport(host) 1369 # check if the host ends with any of the DNS suffixes 1370 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 1371 for name in no_proxy_list: 1372 if name and (hostonly.endswith(name) or host.endswith(name)): 1373 return 1 1374 # otherwise, don't bypass 1375 return 0 1376 1377 1378if sys.platform == 'darwin': 1379 from _scproxy import _get_proxy_settings, _get_proxies 1380 1381 def proxy_bypass_macosx_sysconf(host): 1382 """ 1383 Return True iff this host shouldn't be accessed using a proxy 1384 1385 This function uses the MacOSX framework SystemConfiguration 1386 to fetch the proxy information. 1387 """ 1388 import re 1389 import socket 1390 from fnmatch import fnmatch 1391 1392 hostonly, port = splitport(host) 1393 1394 def ip2num(ipAddr): 1395 parts = ipAddr.split('.') 1396 parts = map(int, parts) 1397 if len(parts) != 4: 1398 parts = (parts + [0, 0, 0, 0])[:4] 1399 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 1400 1401 proxy_settings = _get_proxy_settings() 1402 1403 # Check for simple host names: 1404 if '.' not in host: 1405 if proxy_settings['exclude_simple']: 1406 return True 1407 1408 hostIP = None 1409 1410 for value in proxy_settings.get('exceptions', ()): 1411 # Items in the list are strings like these: *.local, 169.254/16 1412 if not value: continue 1413 1414 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 1415 if m is not None: 1416 if hostIP is None: 1417 try: 1418 hostIP = socket.gethostbyname(hostonly) 1419 hostIP = ip2num(hostIP) 1420 except socket.error: 1421 continue 1422 1423 base = ip2num(m.group(1)) 1424 mask = m.group(2) 1425 if mask is None: 1426 mask = 8 * (m.group(1).count('.') + 1) 1427 1428 else: 1429 mask = int(mask[1:]) 1430 mask = 32 - mask 1431 1432 if (hostIP >> mask) == (base >> mask): 1433 return True 1434 1435 elif fnmatch(host, value): 1436 return True 1437 1438 return False 1439 1440 def getproxies_macosx_sysconf(): 1441 """Return a dictionary of scheme -> proxy server URL mappings. 1442 1443 This function uses the MacOSX framework SystemConfiguration 1444 to fetch the proxy information. 1445 """ 1446 return _get_proxies() 1447 1448 def proxy_bypass(host): 1449 if getproxies_environment(): 1450 return proxy_bypass_environment(host) 1451 else: 1452 return proxy_bypass_macosx_sysconf(host) 1453 1454 def getproxies(): 1455 return getproxies_environment() or getproxies_macosx_sysconf() 1456 1457elif os.name == 'nt': 1458 def getproxies_registry(): 1459 """Return a dictionary of scheme -> proxy server URL mappings. 1460 1461 Win32 uses the registry to store proxies. 1462 1463 """ 1464 proxies = {} 1465 try: 1466 import _winreg 1467 except ImportError: 1468 # Std module, so should be around - but you never know! 1469 return proxies 1470 try: 1471 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1472 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1473 proxyEnable = _winreg.QueryValueEx(internetSettings, 1474 'ProxyEnable')[0] 1475 if proxyEnable: 1476 # Returned as Unicode but problems if not converted to ASCII 1477 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1478 'ProxyServer')[0]) 1479 if '=' in proxyServer: 1480 # Per-protocol settings 1481 for p in proxyServer.split(';'): 1482 protocol, address = p.split('=', 1) 1483 # See if address has a type:// prefix 1484 import re 1485 if not re.match('^([^/:]+)://', address): 1486 address = '%s://%s' % (protocol, address) 1487 proxies[protocol] = address 1488 else: 1489 # Use one setting for all protocols 1490 if proxyServer[:5] == 'http:': 1491 proxies['http'] = proxyServer 1492 else: 1493 proxies['http'] = 'http://%s' % proxyServer 1494 proxies['https'] = 'https://%s' % proxyServer 1495 proxies['ftp'] = 'ftp://%s' % proxyServer 1496 internetSettings.Close() 1497 except (WindowsError, ValueError, TypeError): 1498 # Either registry key not found etc, or the value in an 1499 # unexpected format. 1500 # proxies already set up to be empty so nothing to do 1501 pass 1502 return proxies 1503 1504 def getproxies(): 1505 """Return a dictionary of scheme -> proxy server URL mappings. 1506 1507 Returns settings gathered from the environment, if specified, 1508 or the registry. 1509 1510 """ 1511 return getproxies_environment() or getproxies_registry() 1512 1513 def proxy_bypass_registry(host): 1514 try: 1515 import _winreg 1516 import re 1517 except ImportError: 1518 # Std modules, so should be around - but you never know! 1519 return 0 1520 try: 1521 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1522 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1523 proxyEnable = _winreg.QueryValueEx(internetSettings, 1524 'ProxyEnable')[0] 1525 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1526 'ProxyOverride')[0]) 1527 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1528 except WindowsError: 1529 return 0 1530 if not proxyEnable or not proxyOverride: 1531 return 0 1532 # try to make a host list from name and IP address. 1533 rawHost, port = splitport(host) 1534 host = [rawHost] 1535 try: 1536 addr = socket.gethostbyname(rawHost) 1537 if addr != rawHost: 1538 host.append(addr) 1539 except socket.error: 1540 pass 1541 try: 1542 fqdn = socket.getfqdn(rawHost) 1543 if fqdn != rawHost: 1544 host.append(fqdn) 1545 except socket.error: 1546 pass 1547 # make a check value list from the registry entry: replace the 1548 # '<local>' string by the localhost entry and the corresponding 1549 # canonical entry. 1550 proxyOverride = proxyOverride.split(';') 1551 # now check if we match one of the registry values. 1552 for test in proxyOverride: 1553 if test == '<local>': 1554 if '.' not in rawHost: 1555 return 1 1556 test = test.replace(".", r"\.") # mask dots 1557 test = test.replace("*", r".*") # change glob sequence 1558 test = test.replace("?", r".") # change glob char 1559 for val in host: 1560 # print "%s <--> %s" %( test, val ) 1561 if re.match(test, val, re.I): 1562 return 1 1563 return 0 1564 1565 def proxy_bypass(host): 1566 """Return a dictionary of scheme -> proxy server URL mappings. 1567 1568 Returns settings gathered from the environment, if specified, 1569 or the registry. 1570 1571 """ 1572 if getproxies_environment(): 1573 return proxy_bypass_environment(host) 1574 else: 1575 return proxy_bypass_registry(host) 1576 1577else: 1578 # By default use environment variables 1579 getproxies = getproxies_environment 1580 proxy_bypass = proxy_bypass_environment 1581 1582# Test and time quote() and unquote() 1583def test1(): 1584 s = '' 1585 for i in range(256): s = s + chr(i) 1586 s = s*4 1587 t0 = time.time() 1588 qs = quote(s) 1589 uqs = unquote(qs) 1590 t1 = time.time() 1591 if uqs != s: 1592 print 'Wrong!' 1593 print repr(s) 1594 print repr(qs) 1595 print repr(uqs) 1596 print round(t1 - t0, 3), 'sec' 1597 1598 1599def reporthook(blocknum, blocksize, totalsize): 1600 # Report during remote transfers 1601 print "Block number: %d, Block size: %d, Total size: %d" % ( 1602 blocknum, blocksize, totalsize) 1603