urllib.py revision b42c53e442b211d0ded1d4c9abd18c74d29ed663
1"""Open an arbitrary URL. 2 3See the following document for more info on URLs: 4"Names and Addresses, URIs, URLs, URNs, URCs", at 5http://www.w3.org/pub/WWW/Addressing/Overview.html 6 7See also the HTTP spec (from which the error codes are derived): 8"HTTP - Hypertext Transfer Protocol", at 9http://www.w3.org/pub/WWW/Protocols/ 10 11Related standards and specs: 12- RFC1808: the "relative URL" spec. (authoritative status) 13- RFC1738 - the "URL standard". (authoritative status) 14- RFC1630 - the "URI spec". (informational status) 15 16The object returned by URLopener().open(file) will differ per 17protocol. All you know is that is has methods read(), readline(), 18readlines(), fileno(), close() and info(). The read*(), fileno() 19and close() methods work like those of open files. 20The info() method returns a mimetools.Message object which can be 21used to query various info about the object, if available. 22(mimetools.Message objects are queried with the getheader() method.) 23""" 24 25import string 26import socket 27import os 28import time 29import sys 30from urlparse import urljoin as basejoin 31 32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 34 "urlencode", "url2pathname", "pathname2url", "splittag", 35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 36 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 37 "splitnport", "splitquery", "splitattr", "splitvalue", 38 "getproxies"] 39 40__version__ = '1.17' # XXX This version is not always updated :-( 41 42MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 43 44# Helper for non-unix systems 45if os.name == 'nt': 46 from nturl2path import url2pathname, pathname2url 47elif os.name == 'riscos': 48 from rourl2path import url2pathname, pathname2url 49else: 50 def url2pathname(pathname): 51 """OS-specific conversion from a relative URL of the 'file' scheme 52 to a file system path; not recommended for general use.""" 53 return unquote(pathname) 54 55 def pathname2url(pathname): 56 """OS-specific conversion from a file system path to a relative URL 57 of the 'file' scheme; not recommended for general use.""" 58 return quote(pathname) 59 60# This really consists of two pieces: 61# (1) a class which handles opening of all sorts of URLs 62# (plus assorted utilities etc.) 63# (2) a set of functions for parsing URLs 64# XXX Should these be separated out into different modules? 65 66 67# Shortcut for basic usage 68_urlopener = None 69def urlopen(url, data=None, proxies=None): 70 """Create a file-like object for the specified URL to read from.""" 71 from warnings import warnpy3k 72 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 73 "favor of urllib2.urlopen()", stacklevel=2) 74 75 global _urlopener 76 if proxies is not None: 77 opener = FancyURLopener(proxies=proxies) 78 elif not _urlopener: 79 opener = FancyURLopener() 80 _urlopener = opener 81 else: 82 opener = _urlopener 83 if data is None: 84 return opener.open(url) 85 else: 86 return opener.open(url, data) 87def urlretrieve(url, filename=None, reporthook=None, data=None): 88 global _urlopener 89 if not _urlopener: 90 _urlopener = FancyURLopener() 91 return _urlopener.retrieve(url, filename, reporthook, data) 92def urlcleanup(): 93 if _urlopener: 94 _urlopener.cleanup() 95 _safe_quoters.clear() 96 ftpcache.clear() 97 98# check for SSL 99try: 100 import ssl 101except: 102 _have_ssl = False 103else: 104 _have_ssl = True 105 106# exception raised when downloaded size does not match content-length 107class ContentTooShortError(IOError): 108 def __init__(self, message, content): 109 IOError.__init__(self, message) 110 self.content = content 111 112ftpcache = {} 113class URLopener: 114 """Class to open URLs. 115 This is a class rather than just a subroutine because we may need 116 more than one set of global protocol-specific options. 117 Note -- this is a base class for those who don't want the 118 automatic handling of errors type 302 (relocated) and 401 119 (authorization needed).""" 120 121 __tempfiles = None 122 123 version = "Python-urllib/%s" % __version__ 124 125 # Constructor 126 def __init__(self, proxies=None, **x509): 127 if proxies is None: 128 proxies = getproxies() 129 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 130 self.proxies = proxies 131 self.key_file = x509.get('key_file') 132 self.cert_file = x509.get('cert_file') 133 self.addheaders = [('User-Agent', self.version)] 134 self.__tempfiles = [] 135 self.__unlink = os.unlink # See cleanup() 136 self.tempcache = None 137 # Undocumented feature: if you assign {} to tempcache, 138 # it is used to cache files retrieved with 139 # self.retrieve(). This is not enabled by default 140 # since it does not work for changing documents (and I 141 # haven't got the logic to check expiration headers 142 # yet). 143 self.ftpcache = ftpcache 144 # Undocumented feature: you can use a different 145 # ftp cache by assigning to the .ftpcache member; 146 # in case you want logically independent URL openers 147 # XXX This is not threadsafe. Bah. 148 149 def __del__(self): 150 self.close() 151 152 def close(self): 153 self.cleanup() 154 155 def cleanup(self): 156 # This code sometimes runs when the rest of this module 157 # has already been deleted, so it can't use any globals 158 # or import anything. 159 if self.__tempfiles: 160 for file in self.__tempfiles: 161 try: 162 self.__unlink(file) 163 except OSError: 164 pass 165 del self.__tempfiles[:] 166 if self.tempcache: 167 self.tempcache.clear() 168 169 def addheader(self, *args): 170 """Add a header to be used by the HTTP interface only 171 e.g. u.addheader('Accept', 'sound/basic')""" 172 self.addheaders.append(args) 173 174 # External interface 175 def open(self, fullurl, data=None): 176 """Use URLopener().open(file) instead of open(file, 'r').""" 177 fullurl = unwrap(toBytes(fullurl)) 178 # percent encode url, fixing lame server errors for e.g, like space 179 # within url paths. 180 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 181 if self.tempcache and fullurl in self.tempcache: 182 filename, headers = self.tempcache[fullurl] 183 fp = open(filename, 'rb') 184 return addinfourl(fp, headers, fullurl) 185 urltype, url = splittype(fullurl) 186 if not urltype: 187 urltype = 'file' 188 if urltype in self.proxies: 189 proxy = self.proxies[urltype] 190 urltype, proxyhost = splittype(proxy) 191 host, selector = splithost(proxyhost) 192 url = (host, fullurl) # Signal special case to open_*() 193 else: 194 proxy = None 195 name = 'open_' + urltype 196 self.type = urltype 197 name = name.replace('-', '_') 198 if not hasattr(self, name): 199 if proxy: 200 return self.open_unknown_proxy(proxy, fullurl, data) 201 else: 202 return self.open_unknown(fullurl, data) 203 try: 204 if data is None: 205 return getattr(self, name)(url) 206 else: 207 return getattr(self, name)(url, data) 208 except socket.error, msg: 209 raise IOError, ('socket error', msg), sys.exc_info()[2] 210 211 def open_unknown(self, fullurl, data=None): 212 """Overridable interface to open unknown URL type.""" 213 type, url = splittype(fullurl) 214 raise IOError, ('url error', 'unknown url type', type) 215 216 def open_unknown_proxy(self, proxy, fullurl, data=None): 217 """Overridable interface to open unknown URL type.""" 218 type, url = splittype(fullurl) 219 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 220 221 # External interface 222 def retrieve(self, url, filename=None, reporthook=None, data=None): 223 """retrieve(url) returns (filename, headers) for a local object 224 or (tempfilename, headers) for a remote object.""" 225 url = unwrap(toBytes(url)) 226 if self.tempcache and url in self.tempcache: 227 return self.tempcache[url] 228 type, url1 = splittype(url) 229 if filename is None and (not type or type == 'file'): 230 try: 231 fp = self.open_local_file(url1) 232 hdrs = fp.info() 233 fp.close() 234 return url2pathname(splithost(url1)[1]), hdrs 235 except IOError: 236 pass 237 fp = self.open(url, data) 238 try: 239 headers = fp.info() 240 if filename: 241 tfp = open(filename, 'wb') 242 else: 243 import tempfile 244 garbage, path = splittype(url) 245 garbage, path = splithost(path or "") 246 path, garbage = splitquery(path or "") 247 path, garbage = splitattr(path or "") 248 suffix = os.path.splitext(path)[1] 249 (fd, filename) = tempfile.mkstemp(suffix) 250 self.__tempfiles.append(filename) 251 tfp = os.fdopen(fd, 'wb') 252 try: 253 result = filename, headers 254 if self.tempcache is not None: 255 self.tempcache[url] = result 256 bs = 1024*8 257 size = -1 258 read = 0 259 blocknum = 0 260 if reporthook: 261 if "content-length" in headers: 262 size = int(headers["Content-Length"]) 263 reporthook(blocknum, bs, size) 264 while 1: 265 block = fp.read(bs) 266 if block == "": 267 break 268 read += len(block) 269 tfp.write(block) 270 blocknum += 1 271 if reporthook: 272 reporthook(blocknum, bs, size) 273 finally: 274 tfp.close() 275 finally: 276 fp.close() 277 278 # raise exception if actual size does not match content-length header 279 if size >= 0 and read < size: 280 raise ContentTooShortError("retrieval incomplete: got only %i out " 281 "of %i bytes" % (read, size), result) 282 283 return result 284 285 # Each method named open_<type> knows how to open that type of URL 286 287 def open_http(self, url, data=None): 288 """Use HTTP protocol.""" 289 import httplib 290 user_passwd = None 291 proxy_passwd= None 292 if isinstance(url, str): 293 host, selector = splithost(url) 294 if host: 295 user_passwd, host = splituser(host) 296 host = unquote(host) 297 realhost = host 298 else: 299 host, selector = url 300 # check whether the proxy contains authorization information 301 proxy_passwd, host = splituser(host) 302 # now we proceed with the url we want to obtain 303 urltype, rest = splittype(selector) 304 url = rest 305 user_passwd = None 306 if urltype.lower() != 'http': 307 realhost = None 308 else: 309 realhost, rest = splithost(rest) 310 if realhost: 311 user_passwd, realhost = splituser(realhost) 312 if user_passwd: 313 selector = "%s://%s%s" % (urltype, realhost, rest) 314 if proxy_bypass(realhost): 315 host = realhost 316 317 #print "proxy via http:", host, selector 318 if not host: raise IOError, ('http error', 'no host given') 319 320 if proxy_passwd: 321 import base64 322 proxy_auth = base64.b64encode(proxy_passwd).strip() 323 else: 324 proxy_auth = None 325 326 if user_passwd: 327 import base64 328 auth = base64.b64encode(user_passwd).strip() 329 else: 330 auth = None 331 h = httplib.HTTP(host) 332 if data is not None: 333 h.putrequest('POST', selector) 334 h.putheader('Content-Type', 'application/x-www-form-urlencoded') 335 h.putheader('Content-Length', '%d' % len(data)) 336 else: 337 h.putrequest('GET', selector) 338 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 339 if auth: h.putheader('Authorization', 'Basic %s' % auth) 340 if realhost: h.putheader('Host', realhost) 341 for args in self.addheaders: h.putheader(*args) 342 h.endheaders(data) 343 errcode, errmsg, headers = h.getreply() 344 fp = h.getfile() 345 if errcode == -1: 346 if fp: fp.close() 347 # something went wrong with the HTTP status line 348 raise IOError, ('http protocol error', 0, 349 'got a bad status line', None) 350 # According to RFC 2616, "2xx" code indicates that the client's 351 # request was successfully received, understood, and accepted. 352 if (200 <= errcode < 300): 353 return addinfourl(fp, headers, "http:" + url, errcode) 354 else: 355 if data is None: 356 return self.http_error(url, fp, errcode, errmsg, headers) 357 else: 358 return self.http_error(url, fp, errcode, errmsg, headers, data) 359 360 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 361 """Handle http errors. 362 Derived class can override this, or provide specific handlers 363 named http_error_DDD where DDD is the 3-digit error code.""" 364 # First check if there's a specific handler for this error 365 name = 'http_error_%d' % errcode 366 if hasattr(self, name): 367 method = getattr(self, name) 368 if data is None: 369 result = method(url, fp, errcode, errmsg, headers) 370 else: 371 result = method(url, fp, errcode, errmsg, headers, data) 372 if result: return result 373 return self.http_error_default(url, fp, errcode, errmsg, headers) 374 375 def http_error_default(self, url, fp, errcode, errmsg, headers): 376 """Default error handler: close the connection and raise IOError.""" 377 void = fp.read() 378 fp.close() 379 raise IOError, ('http error', errcode, errmsg, headers) 380 381 if _have_ssl: 382 def open_https(self, url, data=None): 383 """Use HTTPS protocol.""" 384 385 import httplib 386 user_passwd = None 387 proxy_passwd = None 388 if isinstance(url, str): 389 host, selector = splithost(url) 390 if host: 391 user_passwd, host = splituser(host) 392 host = unquote(host) 393 realhost = host 394 else: 395 host, selector = url 396 # here, we determine, whether the proxy contains authorization information 397 proxy_passwd, host = splituser(host) 398 urltype, rest = splittype(selector) 399 url = rest 400 user_passwd = None 401 if urltype.lower() != 'https': 402 realhost = None 403 else: 404 realhost, rest = splithost(rest) 405 if realhost: 406 user_passwd, realhost = splituser(realhost) 407 if user_passwd: 408 selector = "%s://%s%s" % (urltype, realhost, rest) 409 #print "proxy via https:", host, selector 410 if not host: raise IOError, ('https error', 'no host given') 411 if proxy_passwd: 412 import base64 413 proxy_auth = base64.b64encode(proxy_passwd).strip() 414 else: 415 proxy_auth = None 416 if user_passwd: 417 import base64 418 auth = base64.b64encode(user_passwd).strip() 419 else: 420 auth = None 421 h = httplib.HTTPS(host, 0, 422 key_file=self.key_file, 423 cert_file=self.cert_file) 424 if data is not None: 425 h.putrequest('POST', selector) 426 h.putheader('Content-Type', 427 'application/x-www-form-urlencoded') 428 h.putheader('Content-Length', '%d' % len(data)) 429 else: 430 h.putrequest('GET', selector) 431 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 432 if auth: h.putheader('Authorization', 'Basic %s' % auth) 433 if realhost: h.putheader('Host', realhost) 434 for args in self.addheaders: h.putheader(*args) 435 h.endheaders(data) 436 errcode, errmsg, headers = h.getreply() 437 fp = h.getfile() 438 if errcode == -1: 439 if fp: fp.close() 440 # something went wrong with the HTTP status line 441 raise IOError, ('http protocol error', 0, 442 'got a bad status line', None) 443 # According to RFC 2616, "2xx" code indicates that the client's 444 # request was successfully received, understood, and accepted. 445 if (200 <= errcode < 300): 446 return addinfourl(fp, headers, "https:" + url, errcode) 447 else: 448 if data is None: 449 return self.http_error(url, fp, errcode, errmsg, headers) 450 else: 451 return self.http_error(url, fp, errcode, errmsg, headers, 452 data) 453 454 def open_file(self, url): 455 """Use local file or FTP depending on form of URL.""" 456 if not isinstance(url, str): 457 raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 458 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 459 return self.open_ftp(url) 460 else: 461 return self.open_local_file(url) 462 463 def open_local_file(self, url): 464 """Use local file.""" 465 import mimetypes, mimetools, email.utils 466 try: 467 from cStringIO import StringIO 468 except ImportError: 469 from StringIO import StringIO 470 host, file = splithost(url) 471 localname = url2pathname(file) 472 try: 473 stats = os.stat(localname) 474 except OSError, e: 475 raise IOError(e.errno, e.strerror, e.filename) 476 size = stats.st_size 477 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 478 mtype = mimetypes.guess_type(url)[0] 479 headers = mimetools.Message(StringIO( 480 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 481 (mtype or 'text/plain', size, modified))) 482 if not host: 483 urlfile = file 484 if file[:1] == '/': 485 urlfile = 'file://' + file 486 return addinfourl(open(localname, 'rb'), 487 headers, urlfile) 488 host, port = splitport(host) 489 if not port \ 490 and socket.gethostbyname(host) in (localhost(), thishost()): 491 urlfile = file 492 if file[:1] == '/': 493 urlfile = 'file://' + file 494 return addinfourl(open(localname, 'rb'), 495 headers, urlfile) 496 raise IOError, ('local file error', 'not on local host') 497 498 def open_ftp(self, url): 499 """Use FTP protocol.""" 500 if not isinstance(url, str): 501 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 502 import mimetypes, mimetools 503 try: 504 from cStringIO import StringIO 505 except ImportError: 506 from StringIO import StringIO 507 host, path = splithost(url) 508 if not host: raise IOError, ('ftp error', 'no host given') 509 host, port = splitport(host) 510 user, host = splituser(host) 511 if user: user, passwd = splitpasswd(user) 512 else: passwd = None 513 host = unquote(host) 514 user = user or '' 515 passwd = passwd or '' 516 host = socket.gethostbyname(host) 517 if not port: 518 import ftplib 519 port = ftplib.FTP_PORT 520 else: 521 port = int(port) 522 path, attrs = splitattr(path) 523 path = unquote(path) 524 dirs = path.split('/') 525 dirs, file = dirs[:-1], dirs[-1] 526 if dirs and not dirs[0]: dirs = dirs[1:] 527 if dirs and not dirs[0]: dirs[0] = '/' 528 key = user, host, port, '/'.join(dirs) 529 # XXX thread unsafe! 530 if len(self.ftpcache) > MAXFTPCACHE: 531 # Prune the cache, rather arbitrarily 532 for k in self.ftpcache.keys(): 533 if k != key: 534 v = self.ftpcache[k] 535 del self.ftpcache[k] 536 v.close() 537 try: 538 if not key in self.ftpcache: 539 self.ftpcache[key] = \ 540 ftpwrapper(user, passwd, host, port, dirs) 541 if not file: type = 'D' 542 else: type = 'I' 543 for attr in attrs: 544 attr, value = splitvalue(attr) 545 if attr.lower() == 'type' and \ 546 value in ('a', 'A', 'i', 'I', 'd', 'D'): 547 type = value.upper() 548 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 549 mtype = mimetypes.guess_type("ftp:" + url)[0] 550 headers = "" 551 if mtype: 552 headers += "Content-Type: %s\n" % mtype 553 if retrlen is not None and retrlen >= 0: 554 headers += "Content-Length: %d\n" % retrlen 555 headers = mimetools.Message(StringIO(headers)) 556 return addinfourl(fp, headers, "ftp:" + url) 557 except ftperrors(), msg: 558 raise IOError, ('ftp error', msg), sys.exc_info()[2] 559 560 def open_data(self, url, data=None): 561 """Use "data" URL.""" 562 if not isinstance(url, str): 563 raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 564 # ignore POSTed data 565 # 566 # syntax of data URLs: 567 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 568 # mediatype := [ type "/" subtype ] *( ";" parameter ) 569 # data := *urlchar 570 # parameter := attribute "=" value 571 import mimetools 572 try: 573 from cStringIO import StringIO 574 except ImportError: 575 from StringIO import StringIO 576 try: 577 [type, data] = url.split(',', 1) 578 except ValueError: 579 raise IOError, ('data error', 'bad data URL') 580 if not type: 581 type = 'text/plain;charset=US-ASCII' 582 semi = type.rfind(';') 583 if semi >= 0 and '=' not in type[semi:]: 584 encoding = type[semi+1:] 585 type = type[:semi] 586 else: 587 encoding = '' 588 msg = [] 589 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 590 time.gmtime(time.time()))) 591 msg.append('Content-type: %s' % type) 592 if encoding == 'base64': 593 import base64 594 data = base64.decodestring(data) 595 else: 596 data = unquote(data) 597 msg.append('Content-Length: %d' % len(data)) 598 msg.append('') 599 msg.append(data) 600 msg = '\n'.join(msg) 601 f = StringIO(msg) 602 headers = mimetools.Message(f, 0) 603 #f.fileno = None # needed for addinfourl 604 return addinfourl(f, headers, url) 605 606 607class FancyURLopener(URLopener): 608 """Derived class with handlers for errors we can handle (perhaps).""" 609 610 def __init__(self, *args, **kwargs): 611 URLopener.__init__(self, *args, **kwargs) 612 self.auth_cache = {} 613 self.tries = 0 614 self.maxtries = 10 615 616 def http_error_default(self, url, fp, errcode, errmsg, headers): 617 """Default error handling -- don't raise an exception.""" 618 return addinfourl(fp, headers, "http:" + url, errcode) 619 620 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 621 """Error 302 -- relocated (temporarily).""" 622 self.tries += 1 623 if self.maxtries and self.tries >= self.maxtries: 624 if hasattr(self, "http_error_500"): 625 meth = self.http_error_500 626 else: 627 meth = self.http_error_default 628 self.tries = 0 629 return meth(url, fp, 500, 630 "Internal Server Error: Redirect Recursion", headers) 631 result = self.redirect_internal(url, fp, errcode, errmsg, headers, 632 data) 633 self.tries = 0 634 return result 635 636 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 637 if 'location' in headers: 638 newurl = headers['location'] 639 elif 'uri' in headers: 640 newurl = headers['uri'] 641 else: 642 return 643 void = fp.read() 644 fp.close() 645 # In case the server sent a relative URL, join with original: 646 newurl = basejoin(self.type + ":" + url, newurl) 647 648 # For security reasons we do not allow redirects to protocols 649 # other than HTTP, HTTPS or FTP. 650 newurl_lower = newurl.lower() 651 if not (newurl_lower.startswith('http://') or 652 newurl_lower.startswith('https://') or 653 newurl_lower.startswith('ftp://')): 654 raise IOError('redirect error', errcode, 655 errmsg + " - Redirection to url '%s' is not allowed" % 656 newurl, 657 headers) 658 659 return self.open(newurl) 660 661 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 662 """Error 301 -- also relocated (permanently).""" 663 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 664 665 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 666 """Error 303 -- also relocated (essentially identical to 302).""" 667 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 668 669 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 670 """Error 307 -- relocated, but turn POST into error.""" 671 if data is None: 672 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 673 else: 674 return self.http_error_default(url, fp, errcode, errmsg, headers) 675 676 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 677 """Error 401 -- authentication required. 678 This function supports Basic authentication only.""" 679 if not 'www-authenticate' in headers: 680 URLopener.http_error_default(self, url, fp, 681 errcode, errmsg, headers) 682 stuff = headers['www-authenticate'] 683 import re 684 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 685 if not match: 686 URLopener.http_error_default(self, url, fp, 687 errcode, errmsg, headers) 688 scheme, realm = match.groups() 689 if scheme.lower() != 'basic': 690 URLopener.http_error_default(self, url, fp, 691 errcode, errmsg, headers) 692 name = 'retry_' + self.type + '_basic_auth' 693 if data is None: 694 return getattr(self,name)(url, realm) 695 else: 696 return getattr(self,name)(url, realm, data) 697 698 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 699 """Error 407 -- proxy authentication required. 700 This function supports Basic authentication only.""" 701 if not 'proxy-authenticate' in headers: 702 URLopener.http_error_default(self, url, fp, 703 errcode, errmsg, headers) 704 stuff = headers['proxy-authenticate'] 705 import re 706 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 707 if not match: 708 URLopener.http_error_default(self, url, fp, 709 errcode, errmsg, headers) 710 scheme, realm = match.groups() 711 if scheme.lower() != 'basic': 712 URLopener.http_error_default(self, url, fp, 713 errcode, errmsg, headers) 714 name = 'retry_proxy_' + self.type + '_basic_auth' 715 if data is None: 716 return getattr(self,name)(url, realm) 717 else: 718 return getattr(self,name)(url, realm, data) 719 720 def retry_proxy_http_basic_auth(self, url, realm, data=None): 721 host, selector = splithost(url) 722 newurl = 'http://' + host + selector 723 proxy = self.proxies['http'] 724 urltype, proxyhost = splittype(proxy) 725 proxyhost, proxyselector = splithost(proxyhost) 726 i = proxyhost.find('@') + 1 727 proxyhost = proxyhost[i:] 728 user, passwd = self.get_user_passwd(proxyhost, realm, i) 729 if not (user or passwd): return None 730 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 731 self.proxies['http'] = 'http://' + proxyhost + proxyselector 732 if data is None: 733 return self.open(newurl) 734 else: 735 return self.open(newurl, data) 736 737 def retry_proxy_https_basic_auth(self, url, realm, data=None): 738 host, selector = splithost(url) 739 newurl = 'https://' + host + selector 740 proxy = self.proxies['https'] 741 urltype, proxyhost = splittype(proxy) 742 proxyhost, proxyselector = splithost(proxyhost) 743 i = proxyhost.find('@') + 1 744 proxyhost = proxyhost[i:] 745 user, passwd = self.get_user_passwd(proxyhost, realm, i) 746 if not (user or passwd): return None 747 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 748 self.proxies['https'] = 'https://' + proxyhost + proxyselector 749 if data is None: 750 return self.open(newurl) 751 else: 752 return self.open(newurl, data) 753 754 def retry_http_basic_auth(self, url, realm, data=None): 755 host, selector = splithost(url) 756 i = host.find('@') + 1 757 host = host[i:] 758 user, passwd = self.get_user_passwd(host, realm, i) 759 if not (user or passwd): return None 760 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 761 newurl = 'http://' + host + selector 762 if data is None: 763 return self.open(newurl) 764 else: 765 return self.open(newurl, data) 766 767 def retry_https_basic_auth(self, url, realm, data=None): 768 host, selector = splithost(url) 769 i = host.find('@') + 1 770 host = host[i:] 771 user, passwd = self.get_user_passwd(host, realm, i) 772 if not (user or passwd): return None 773 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 774 newurl = 'https://' + host + selector 775 if data is None: 776 return self.open(newurl) 777 else: 778 return self.open(newurl, data) 779 780 def get_user_passwd(self, host, realm, clear_cache=0): 781 key = realm + '@' + host.lower() 782 if key in self.auth_cache: 783 if clear_cache: 784 del self.auth_cache[key] 785 else: 786 return self.auth_cache[key] 787 user, passwd = self.prompt_user_passwd(host, realm) 788 if user or passwd: self.auth_cache[key] = (user, passwd) 789 return user, passwd 790 791 def prompt_user_passwd(self, host, realm): 792 """Override this in a GUI environment!""" 793 import getpass 794 try: 795 user = raw_input("Enter username for %s at %s: " % (realm, 796 host)) 797 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 798 (user, realm, host)) 799 return user, passwd 800 except KeyboardInterrupt: 801 print 802 return None, None 803 804 805# Utility functions 806 807_localhost = None 808def localhost(): 809 """Return the IP address of the magic hostname 'localhost'.""" 810 global _localhost 811 if _localhost is None: 812 _localhost = socket.gethostbyname('localhost') 813 return _localhost 814 815_thishost = None 816def thishost(): 817 """Return the IP address of the current host.""" 818 global _thishost 819 if _thishost is None: 820 _thishost = socket.gethostbyname(socket.gethostname()) 821 return _thishost 822 823_ftperrors = None 824def ftperrors(): 825 """Return the set of errors raised by the FTP class.""" 826 global _ftperrors 827 if _ftperrors is None: 828 import ftplib 829 _ftperrors = ftplib.all_errors 830 return _ftperrors 831 832_noheaders = None 833def noheaders(): 834 """Return an empty mimetools.Message object.""" 835 global _noheaders 836 if _noheaders is None: 837 import mimetools 838 try: 839 from cStringIO import StringIO 840 except ImportError: 841 from StringIO import StringIO 842 _noheaders = mimetools.Message(StringIO(), 0) 843 _noheaders.fp.close() # Recycle file descriptor 844 return _noheaders 845 846 847# Utility classes 848 849class ftpwrapper: 850 """Class used by open_ftp() for cache of open FTP connections.""" 851 852 def __init__(self, user, passwd, host, port, dirs, 853 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 854 persistent=False): 855 self.user = user 856 self.passwd = passwd 857 self.host = host 858 self.port = port 859 self.dirs = dirs 860 self.timeout = timeout 861 self.refcount = 0 862 self.keepalive = persistent 863 self.init() 864 865 def init(self): 866 import ftplib 867 self.busy = 0 868 self.ftp = ftplib.FTP() 869 self.ftp.connect(self.host, self.port, self.timeout) 870 self.ftp.login(self.user, self.passwd) 871 for dir in self.dirs: 872 self.ftp.cwd(dir) 873 874 def retrfile(self, file, type): 875 import ftplib 876 self.endtransfer() 877 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 878 else: cmd = 'TYPE ' + type; isdir = 0 879 try: 880 self.ftp.voidcmd(cmd) 881 except ftplib.all_errors: 882 self.init() 883 self.ftp.voidcmd(cmd) 884 conn = None 885 if file and not isdir: 886 # Try to retrieve as a file 887 try: 888 cmd = 'RETR ' + file 889 conn, retrlen = self.ftp.ntransfercmd(cmd) 890 except ftplib.error_perm, reason: 891 if str(reason)[:3] != '550': 892 raise IOError, ('ftp error', reason), sys.exc_info()[2] 893 if not conn: 894 # Set transfer mode to ASCII! 895 self.ftp.voidcmd('TYPE A') 896 # Try a directory listing. Verify that directory exists. 897 if file: 898 pwd = self.ftp.pwd() 899 try: 900 try: 901 self.ftp.cwd(file) 902 except ftplib.error_perm, reason: 903 raise IOError, ('ftp error', reason), sys.exc_info()[2] 904 finally: 905 self.ftp.cwd(pwd) 906 cmd = 'LIST ' + file 907 else: 908 cmd = 'LIST' 909 conn, retrlen = self.ftp.ntransfercmd(cmd) 910 self.busy = 1 911 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 912 self.refcount += 1 913 conn.close() 914 # Pass back both a suitably decorated object and a retrieval length 915 return (ftpobj, retrlen) 916 917 def endtransfer(self): 918 if not self.busy: 919 return 920 self.busy = 0 921 try: 922 self.ftp.voidresp() 923 except ftperrors(): 924 pass 925 926 def close(self): 927 self.keepalive = False 928 if self.refcount <= 0: 929 self.real_close() 930 931 def file_close(self): 932 self.endtransfer() 933 self.refcount -= 1 934 if self.refcount <= 0 and not self.keepalive: 935 self.real_close() 936 937 def real_close(self): 938 self.endtransfer() 939 try: 940 self.ftp.close() 941 except ftperrors(): 942 pass 943 944class addbase: 945 """Base class for addinfo and addclosehook.""" 946 947 def __init__(self, fp): 948 self.fp = fp 949 self.read = self.fp.read 950 self.readline = self.fp.readline 951 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 952 if hasattr(self.fp, "fileno"): 953 self.fileno = self.fp.fileno 954 else: 955 self.fileno = lambda: None 956 if hasattr(self.fp, "__iter__"): 957 self.__iter__ = self.fp.__iter__ 958 if hasattr(self.fp, "next"): 959 self.next = self.fp.next 960 961 def __repr__(self): 962 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 963 id(self), self.fp) 964 965 def close(self): 966 self.read = None 967 self.readline = None 968 self.readlines = None 969 self.fileno = None 970 if self.fp: self.fp.close() 971 self.fp = None 972 973class addclosehook(addbase): 974 """Class to add a close hook to an open file.""" 975 976 def __init__(self, fp, closehook, *hookargs): 977 addbase.__init__(self, fp) 978 self.closehook = closehook 979 self.hookargs = hookargs 980 981 def close(self): 982 addbase.close(self) 983 if self.closehook: 984 self.closehook(*self.hookargs) 985 self.closehook = None 986 self.hookargs = None 987 988class addinfo(addbase): 989 """class to add an info() method to an open file.""" 990 991 def __init__(self, fp, headers): 992 addbase.__init__(self, fp) 993 self.headers = headers 994 995 def info(self): 996 return self.headers 997 998class addinfourl(addbase): 999 """class to add info() and geturl() methods to an open file.""" 1000 1001 def __init__(self, fp, headers, url, code=None): 1002 addbase.__init__(self, fp) 1003 self.headers = headers 1004 self.url = url 1005 self.code = code 1006 1007 def info(self): 1008 return self.headers 1009 1010 def getcode(self): 1011 return self.code 1012 1013 def geturl(self): 1014 return self.url 1015 1016 1017# Utilities to parse URLs (most of these return None for missing parts): 1018# unwrap('<URL:type://host/path>') --> 'type://host/path' 1019# splittype('type:opaquestring') --> 'type', 'opaquestring' 1020# splithost('//host[:port]/path') --> 'host[:port]', '/path' 1021# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 1022# splitpasswd('user:passwd') -> 'user', 'passwd' 1023# splitport('host:port') --> 'host', 'port' 1024# splitquery('/path?query') --> '/path', 'query' 1025# splittag('/path#tag') --> '/path', 'tag' 1026# splitattr('/path;attr1=value1;attr2=value2;...') -> 1027# '/path', ['attr1=value1', 'attr2=value2', ...] 1028# splitvalue('attr=value') --> 'attr', 'value' 1029# unquote('abc%20def') -> 'abc def' 1030# quote('abc def') -> 'abc%20def') 1031 1032try: 1033 unicode 1034except NameError: 1035 def _is_unicode(x): 1036 return 0 1037else: 1038 def _is_unicode(x): 1039 return isinstance(x, unicode) 1040 1041def toBytes(url): 1042 """toBytes(u"URL") --> 'URL'.""" 1043 # Most URL schemes require ASCII. If that changes, the conversion 1044 # can be relaxed 1045 if _is_unicode(url): 1046 try: 1047 url = url.encode("ASCII") 1048 except UnicodeError: 1049 raise UnicodeError("URL " + repr(url) + 1050 " contains non-ASCII characters") 1051 return url 1052 1053def unwrap(url): 1054 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 1055 url = url.strip() 1056 if url[:1] == '<' and url[-1:] == '>': 1057 url = url[1:-1].strip() 1058 if url[:4] == 'URL:': url = url[4:].strip() 1059 return url 1060 1061_typeprog = None 1062def splittype(url): 1063 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 1064 global _typeprog 1065 if _typeprog is None: 1066 import re 1067 _typeprog = re.compile('^([^/:]+):') 1068 1069 match = _typeprog.match(url) 1070 if match: 1071 scheme = match.group(1) 1072 return scheme.lower(), url[len(scheme) + 1:] 1073 return None, url 1074 1075_hostprog = None 1076def splithost(url): 1077 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 1078 global _hostprog 1079 if _hostprog is None: 1080 import re 1081 _hostprog = re.compile('^//([^/?]*)(.*)$') 1082 1083 match = _hostprog.match(url) 1084 if match: 1085 host_port = match.group(1) 1086 path = match.group(2) 1087 if path and not path.startswith('/'): 1088 path = '/' + path 1089 return host_port, path 1090 return None, url 1091 1092_userprog = None 1093def splituser(host): 1094 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 1095 global _userprog 1096 if _userprog is None: 1097 import re 1098 _userprog = re.compile('^(.*)@(.*)$') 1099 1100 match = _userprog.match(host) 1101 if match: return match.group(1, 2) 1102 return None, host 1103 1104_passwdprog = None 1105def splitpasswd(user): 1106 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 1107 global _passwdprog 1108 if _passwdprog is None: 1109 import re 1110 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 1111 1112 match = _passwdprog.match(user) 1113 if match: return match.group(1, 2) 1114 return user, None 1115 1116# splittag('/path#tag') --> '/path', 'tag' 1117_portprog = None 1118def splitport(host): 1119 """splitport('host:port') --> 'host', 'port'.""" 1120 global _portprog 1121 if _portprog is None: 1122 import re 1123 _portprog = re.compile('^(.*):([0-9]+)$') 1124 1125 match = _portprog.match(host) 1126 if match: return match.group(1, 2) 1127 return host, None 1128 1129_nportprog = None 1130def splitnport(host, defport=-1): 1131 """Split host and port, returning numeric port. 1132 Return given default port if no ':' found; defaults to -1. 1133 Return numerical port if a valid number are found after ':'. 1134 Return None if ':' but not a valid number.""" 1135 global _nportprog 1136 if _nportprog is None: 1137 import re 1138 _nportprog = re.compile('^(.*):(.*)$') 1139 1140 match = _nportprog.match(host) 1141 if match: 1142 host, port = match.group(1, 2) 1143 try: 1144 if not port: raise ValueError, "no digits" 1145 nport = int(port) 1146 except ValueError: 1147 nport = None 1148 return host, nport 1149 return host, defport 1150 1151_queryprog = None 1152def splitquery(url): 1153 """splitquery('/path?query') --> '/path', 'query'.""" 1154 global _queryprog 1155 if _queryprog is None: 1156 import re 1157 _queryprog = re.compile('^(.*)\?([^?]*)$') 1158 1159 match = _queryprog.match(url) 1160 if match: return match.group(1, 2) 1161 return url, None 1162 1163_tagprog = None 1164def splittag(url): 1165 """splittag('/path#tag') --> '/path', 'tag'.""" 1166 global _tagprog 1167 if _tagprog is None: 1168 import re 1169 _tagprog = re.compile('^(.*)#([^#]*)$') 1170 1171 match = _tagprog.match(url) 1172 if match: return match.group(1, 2) 1173 return url, None 1174 1175def splitattr(url): 1176 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1177 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1178 words = url.split(';') 1179 return words[0], words[1:] 1180 1181_valueprog = None 1182def splitvalue(attr): 1183 """splitvalue('attr=value') --> 'attr', 'value'.""" 1184 global _valueprog 1185 if _valueprog is None: 1186 import re 1187 _valueprog = re.compile('^([^=]*)=(.*)$') 1188 1189 match = _valueprog.match(attr) 1190 if match: return match.group(1, 2) 1191 return attr, None 1192 1193# urlparse contains a duplicate of this method to avoid a circular import. If 1194# you update this method, also update the copy in urlparse. This code 1195# duplication does not exist in Python3. 1196 1197_hexdig = '0123456789ABCDEFabcdef' 1198_hextochr = dict((a + b, chr(int(a + b, 16))) 1199 for a in _hexdig for b in _hexdig) 1200 1201def unquote(s): 1202 """unquote('abc%20def') -> 'abc def'.""" 1203 res = s.split('%') 1204 # fastpath 1205 if len(res) == 1: 1206 return s 1207 s = res[0] 1208 for item in res[1:]: 1209 try: 1210 s += _hextochr[item[:2]] + item[2:] 1211 except KeyError: 1212 s += '%' + item 1213 except UnicodeDecodeError: 1214 s += unichr(int(item[:2], 16)) + item[2:] 1215 return s 1216 1217def unquote_plus(s): 1218 """unquote('%7e/abc+def') -> '~/abc def'""" 1219 s = s.replace('+', ' ') 1220 return unquote(s) 1221 1222always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1223 'abcdefghijklmnopqrstuvwxyz' 1224 '0123456789' '_.-') 1225_safe_map = {} 1226for i, c in zip(xrange(256), str(bytearray(xrange(256)))): 1227 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 1228_safe_quoters = {} 1229 1230def quote(s, safe='/'): 1231 """quote('abc def') -> 'abc%20def' 1232 1233 Each part of a URL, e.g. the path info, the query, etc., has a 1234 different set of reserved characters that must be quoted. 1235 1236 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1237 the following reserved characters. 1238 1239 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1240 "$" | "," 1241 1242 Each of these characters is reserved in some component of a URL, 1243 but not necessarily in all of them. 1244 1245 By default, the quote function is intended for quoting the path 1246 section of a URL. Thus, it will not encode '/'. This character 1247 is reserved, but in typical usage the quote function is being 1248 called on a path where the existing slash characters are used as 1249 reserved characters. 1250 """ 1251 # fastpath 1252 if not s: 1253 if s is None: 1254 raise TypeError('None object cannot be quoted') 1255 return s 1256 cachekey = (safe, always_safe) 1257 try: 1258 (quoter, safe) = _safe_quoters[cachekey] 1259 except KeyError: 1260 safe_map = _safe_map.copy() 1261 safe_map.update([(c, c) for c in safe]) 1262 quoter = safe_map.__getitem__ 1263 safe = always_safe + safe 1264 _safe_quoters[cachekey] = (quoter, safe) 1265 if not s.rstrip(safe): 1266 return s 1267 return ''.join(map(quoter, s)) 1268 1269def quote_plus(s, safe=''): 1270 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1271 if ' ' in s: 1272 s = quote(s, safe + ' ') 1273 return s.replace(' ', '+') 1274 return quote(s, safe) 1275 1276def urlencode(query, doseq=0): 1277 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1278 1279 If any values in the query arg are sequences and doseq is true, each 1280 sequence element is converted to a separate parameter. 1281 1282 If the query arg is a sequence of two-element tuples, the order of the 1283 parameters in the output will match the order of parameters in the 1284 input. 1285 """ 1286 1287 if hasattr(query,"items"): 1288 # mapping objects 1289 query = query.items() 1290 else: 1291 # it's a bother at times that strings and string-like objects are 1292 # sequences... 1293 try: 1294 # non-sequence items should not work with len() 1295 # non-empty strings will fail this 1296 if len(query) and not isinstance(query[0], tuple): 1297 raise TypeError 1298 # zero-length sequences of all types will get here and succeed, 1299 # but that's a minor nit - since the original implementation 1300 # allowed empty dicts that type of behavior probably should be 1301 # preserved for consistency 1302 except TypeError: 1303 ty,va,tb = sys.exc_info() 1304 raise TypeError, "not a valid non-string sequence or mapping object", tb 1305 1306 l = [] 1307 if not doseq: 1308 # preserve old behavior 1309 for k, v in query: 1310 k = quote_plus(str(k)) 1311 v = quote_plus(str(v)) 1312 l.append(k + '=' + v) 1313 else: 1314 for k, v in query: 1315 k = quote_plus(str(k)) 1316 if isinstance(v, str): 1317 v = quote_plus(v) 1318 l.append(k + '=' + v) 1319 elif _is_unicode(v): 1320 # is there a reasonable way to convert to ASCII? 1321 # encode generates a string, but "replace" or "ignore" 1322 # lose information and "strict" can raise UnicodeError 1323 v = quote_plus(v.encode("ASCII","replace")) 1324 l.append(k + '=' + v) 1325 else: 1326 try: 1327 # is this a sufficient test for sequence-ness? 1328 len(v) 1329 except TypeError: 1330 # not a sequence 1331 v = quote_plus(str(v)) 1332 l.append(k + '=' + v) 1333 else: 1334 # loop over the sequence 1335 for elt in v: 1336 l.append(k + '=' + quote_plus(str(elt))) 1337 return '&'.join(l) 1338 1339# Proxy handling 1340def getproxies_environment(): 1341 """Return a dictionary of scheme -> proxy server URL mappings. 1342 1343 Scan the environment for variables named <scheme>_proxy; 1344 this seems to be the standard convention. If you need a 1345 different way, you can pass a proxies dictionary to the 1346 [Fancy]URLopener constructor. 1347 1348 """ 1349 proxies = {} 1350 for name, value in os.environ.items(): 1351 name = name.lower() 1352 if value and name[-6:] == '_proxy': 1353 proxies[name[:-6]] = value 1354 return proxies 1355 1356def proxy_bypass_environment(host): 1357 """Test if proxies should not be used for a particular host. 1358 1359 Checks the environment for a variable named no_proxy, which should 1360 be a list of DNS suffixes separated by commas, or '*' for all hosts. 1361 """ 1362 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 1363 # '*' is special case for always bypass 1364 if no_proxy == '*': 1365 return 1 1366 # strip port off host 1367 hostonly, port = splitport(host) 1368 # check if the host ends with any of the DNS suffixes 1369 for name in no_proxy.split(','): 1370 if name and (hostonly.endswith(name) or host.endswith(name)): 1371 return 1 1372 # otherwise, don't bypass 1373 return 0 1374 1375 1376if sys.platform == 'darwin': 1377 from _scproxy import _get_proxy_settings, _get_proxies 1378 1379 def proxy_bypass_macosx_sysconf(host): 1380 """ 1381 Return True iff this host shouldn't be accessed using a proxy 1382 1383 This function uses the MacOSX framework SystemConfiguration 1384 to fetch the proxy information. 1385 """ 1386 import re 1387 import socket 1388 from fnmatch import fnmatch 1389 1390 hostonly, port = splitport(host) 1391 1392 def ip2num(ipAddr): 1393 parts = ipAddr.split('.') 1394 parts = map(int, parts) 1395 if len(parts) != 4: 1396 parts = (parts + [0, 0, 0, 0])[:4] 1397 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 1398 1399 proxy_settings = _get_proxy_settings() 1400 1401 # Check for simple host names: 1402 if '.' not in host: 1403 if proxy_settings['exclude_simple']: 1404 return True 1405 1406 hostIP = None 1407 1408 for value in proxy_settings.get('exceptions', ()): 1409 # Items in the list are strings like these: *.local, 169.254/16 1410 if not value: continue 1411 1412 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 1413 if m is not None: 1414 if hostIP is None: 1415 try: 1416 hostIP = socket.gethostbyname(hostonly) 1417 hostIP = ip2num(hostIP) 1418 except socket.error: 1419 continue 1420 1421 base = ip2num(m.group(1)) 1422 mask = m.group(2) 1423 if mask is None: 1424 mask = 8 * (m.group(1).count('.') + 1) 1425 1426 else: 1427 mask = int(mask[1:]) 1428 mask = 32 - mask 1429 1430 if (hostIP >> mask) == (base >> mask): 1431 return True 1432 1433 elif fnmatch(host, value): 1434 return True 1435 1436 return False 1437 1438 def getproxies_macosx_sysconf(): 1439 """Return a dictionary of scheme -> proxy server URL mappings. 1440 1441 This function uses the MacOSX framework SystemConfiguration 1442 to fetch the proxy information. 1443 """ 1444 return _get_proxies() 1445 1446 def proxy_bypass(host): 1447 if getproxies_environment(): 1448 return proxy_bypass_environment(host) 1449 else: 1450 return proxy_bypass_macosx_sysconf(host) 1451 1452 def getproxies(): 1453 return getproxies_environment() or getproxies_macosx_sysconf() 1454 1455elif os.name == 'nt': 1456 def getproxies_registry(): 1457 """Return a dictionary of scheme -> proxy server URL mappings. 1458 1459 Win32 uses the registry to store proxies. 1460 1461 """ 1462 proxies = {} 1463 try: 1464 import _winreg 1465 except ImportError: 1466 # Std module, so should be around - but you never know! 1467 return proxies 1468 try: 1469 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1470 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1471 proxyEnable = _winreg.QueryValueEx(internetSettings, 1472 'ProxyEnable')[0] 1473 if proxyEnable: 1474 # Returned as Unicode but problems if not converted to ASCII 1475 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1476 'ProxyServer')[0]) 1477 if '=' in proxyServer: 1478 # Per-protocol settings 1479 for p in proxyServer.split(';'): 1480 protocol, address = p.split('=', 1) 1481 # See if address has a type:// prefix 1482 import re 1483 if not re.match('^([^/:]+)://', address): 1484 address = '%s://%s' % (protocol, address) 1485 proxies[protocol] = address 1486 else: 1487 # Use one setting for all protocols 1488 if proxyServer[:5] == 'http:': 1489 proxies['http'] = proxyServer 1490 else: 1491 proxies['http'] = 'http://%s' % proxyServer 1492 proxies['https'] = 'https://%s' % proxyServer 1493 proxies['ftp'] = 'ftp://%s' % proxyServer 1494 internetSettings.Close() 1495 except (WindowsError, ValueError, TypeError): 1496 # Either registry key not found etc, or the value in an 1497 # unexpected format. 1498 # proxies already set up to be empty so nothing to do 1499 pass 1500 return proxies 1501 1502 def getproxies(): 1503 """Return a dictionary of scheme -> proxy server URL mappings. 1504 1505 Returns settings gathered from the environment, if specified, 1506 or the registry. 1507 1508 """ 1509 return getproxies_environment() or getproxies_registry() 1510 1511 def proxy_bypass_registry(host): 1512 try: 1513 import _winreg 1514 import re 1515 except ImportError: 1516 # Std modules, so should be around - but you never know! 1517 return 0 1518 try: 1519 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1520 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1521 proxyEnable = _winreg.QueryValueEx(internetSettings, 1522 'ProxyEnable')[0] 1523 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1524 'ProxyOverride')[0]) 1525 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1526 except WindowsError: 1527 return 0 1528 if not proxyEnable or not proxyOverride: 1529 return 0 1530 # try to make a host list from name and IP address. 1531 rawHost, port = splitport(host) 1532 host = [rawHost] 1533 try: 1534 addr = socket.gethostbyname(rawHost) 1535 if addr != rawHost: 1536 host.append(addr) 1537 except socket.error: 1538 pass 1539 try: 1540 fqdn = socket.getfqdn(rawHost) 1541 if fqdn != rawHost: 1542 host.append(fqdn) 1543 except socket.error: 1544 pass 1545 # make a check value list from the registry entry: replace the 1546 # '<local>' string by the localhost entry and the corresponding 1547 # canonical entry. 1548 proxyOverride = proxyOverride.split(';') 1549 # now check if we match one of the registry values. 1550 for test in proxyOverride: 1551 if test == '<local>': 1552 if '.' not in rawHost: 1553 return 1 1554 test = test.replace(".", r"\.") # mask dots 1555 test = test.replace("*", r".*") # change glob sequence 1556 test = test.replace("?", r".") # change glob char 1557 for val in host: 1558 # print "%s <--> %s" %( test, val ) 1559 if re.match(test, val, re.I): 1560 return 1 1561 return 0 1562 1563 def proxy_bypass(host): 1564 """Return a dictionary of scheme -> proxy server URL mappings. 1565 1566 Returns settings gathered from the environment, if specified, 1567 or the registry. 1568 1569 """ 1570 if getproxies_environment(): 1571 return proxy_bypass_environment(host) 1572 else: 1573 return proxy_bypass_registry(host) 1574 1575else: 1576 # By default use environment variables 1577 getproxies = getproxies_environment 1578 proxy_bypass = proxy_bypass_environment 1579 1580# Test and time quote() and unquote() 1581def test1(): 1582 s = '' 1583 for i in range(256): s = s + chr(i) 1584 s = s*4 1585 t0 = time.time() 1586 qs = quote(s) 1587 uqs = unquote(qs) 1588 t1 = time.time() 1589 if uqs != s: 1590 print 'Wrong!' 1591 print repr(s) 1592 print repr(qs) 1593 print repr(uqs) 1594 print round(t1 - t0, 3), 'sec' 1595 1596 1597def reporthook(blocknum, blocksize, totalsize): 1598 # Report during remote transfers 1599 print "Block number: %d, Block size: %d, Total size: %d" % ( 1600 blocknum, blocksize, totalsize) 1601 1602# Test program 1603def test(args=[]): 1604 if not args: 1605 args = [ 1606 '/etc/passwd', 1607 'file:/etc/passwd', 1608 'file://localhost/etc/passwd', 1609 'ftp://ftp.gnu.org/pub/README', 1610 'http://www.python.org/index.html', 1611 ] 1612 if hasattr(URLopener, "open_https"): 1613 args.append('https://synergy.as.cmu.edu/~geek/') 1614 try: 1615 for url in args: 1616 print '-'*10, url, '-'*10 1617 fn, h = urlretrieve(url, None, reporthook) 1618 print fn 1619 if h: 1620 print '======' 1621 for k in h.keys(): print k + ':', h[k] 1622 print '======' 1623 with open(fn, 'rb') as fp: 1624 data = fp.read() 1625 if '\r' in data: 1626 table = string.maketrans("", "") 1627 data = data.translate(table, "\r") 1628 print data 1629 fn, h = None, None 1630 print '-'*40 1631 finally: 1632 urlcleanup() 1633 1634def main(): 1635 import getopt, sys 1636 try: 1637 opts, args = getopt.getopt(sys.argv[1:], "th") 1638 except getopt.error, msg: 1639 print msg 1640 print "Use -h for help" 1641 return 1642 t = 0 1643 for o, a in opts: 1644 if o == '-t': 1645 t = t + 1 1646 if o == '-h': 1647 print "Usage: python urllib.py [-t] [url ...]" 1648 print "-t runs self-test;", 1649 print "otherwise, contents of urls are printed" 1650 return 1651 if t: 1652 if t > 1: 1653 test1() 1654 test(args) 1655 else: 1656 if not args: 1657 print "Use -h for help" 1658 for url in args: 1659 print urlopen(url).read(), 1660 1661# Run test program when run as a script 1662if __name__ == '__main__': 1663 main() 1664