urllib.py revision d5e6cf2b152061cdae0164cef2382086c7638bbc
1"""Open an arbitrary URL. 2 3See the following document for more info on URLs: 4"Names and Addresses, URIs, URLs, URNs, URCs", at 5http://www.w3.org/pub/WWW/Addressing/Overview.html 6 7See also the HTTP spec (from which the error codes are derived): 8"HTTP - Hypertext Transfer Protocol", at 9http://www.w3.org/pub/WWW/Protocols/ 10 11Related standards and specs: 12- RFC1808: the "relative URL" spec. (authoritative status) 13- RFC1738 - the "URL standard". (authoritative status) 14- RFC1630 - the "URI spec". (informational status) 15 16The object returned by URLopener().open(file) will differ per 17protocol. All you know is that is has methods read(), readline(), 18readlines(), fileno(), close() and info(). The read*(), fileno() 19and close() methods work like those of open files. 20The info() method returns a mimetools.Message object which can be 21used to query various info about the object, if available. 22(mimetools.Message objects are queried with the getheader() method.) 23""" 24 25import string 26import socket 27import os 28import time 29import sys 30from urlparse import urljoin as basejoin 31 32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 34 "urlencode", "url2pathname", "pathname2url", "splittag", 35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 36 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 37 "splitnport", "splitquery", "splitattr", "splitvalue", 38 "getproxies"] 39 40__version__ = '1.17' # XXX This version is not always updated :-( 41 42MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 43 44# Helper for non-unix systems 45if os.name == 'mac': 46 from macurl2path import url2pathname, pathname2url 47elif os.name == 'nt': 48 from nturl2path import url2pathname, pathname2url 49elif os.name == 'riscos': 50 from rourl2path import url2pathname, pathname2url 51else: 52 def url2pathname(pathname): 53 """OS-specific conversion from a relative URL of the 'file' scheme 54 to a file system path; not recommended for general use.""" 55 return unquote(pathname) 56 57 def pathname2url(pathname): 58 """OS-specific conversion from a file system path to a relative URL 59 of the 'file' scheme; not recommended for general use.""" 60 return quote(pathname) 61 62# This really consists of two pieces: 63# (1) a class which handles opening of all sorts of URLs 64# (plus assorted utilities etc.) 65# (2) a set of functions for parsing URLs 66# XXX Should these be separated out into different modules? 67 68 69# Shortcut for basic usage 70_urlopener = None 71def urlopen(url, data=None, proxies=None): 72 """urlopen(url [, data]) -> open file-like object""" 73 global _urlopener 74 if proxies is not None: 75 opener = FancyURLopener(proxies=proxies) 76 elif not _urlopener: 77 opener = FancyURLopener() 78 _urlopener = opener 79 else: 80 opener = _urlopener 81 if data is None: 82 return opener.open(url) 83 else: 84 return opener.open(url, data) 85def urlretrieve(url, filename=None, reporthook=None, data=None): 86 global _urlopener 87 if not _urlopener: 88 _urlopener = FancyURLopener() 89 return _urlopener.retrieve(url, filename, reporthook, data) 90def urlcleanup(): 91 if _urlopener: 92 _urlopener.cleanup() 93 94# check for SSL 95try: 96 import ssl 97except: 98 _have_ssl = False 99else: 100 _have_ssl = True 101 102# exception raised when downloaded size does not match content-length 103class ContentTooShortError(IOError): 104 def __init__(self, message, content): 105 IOError.__init__(self, message) 106 self.content = content 107 108ftpcache = {} 109class URLopener: 110 """Class to open URLs. 111 This is a class rather than just a subroutine because we may need 112 more than one set of global protocol-specific options. 113 Note -- this is a base class for those who don't want the 114 automatic handling of errors type 302 (relocated) and 401 115 (authorization needed).""" 116 117 __tempfiles = None 118 119 version = "Python-urllib/%s" % __version__ 120 121 # Constructor 122 def __init__(self, proxies=None, **x509): 123 if proxies is None: 124 proxies = getproxies() 125 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 126 self.proxies = proxies 127 self.key_file = x509.get('key_file') 128 self.cert_file = x509.get('cert_file') 129 self.addheaders = [('User-Agent', self.version)] 130 self.__tempfiles = [] 131 self.__unlink = os.unlink # See cleanup() 132 self.tempcache = None 133 # Undocumented feature: if you assign {} to tempcache, 134 # it is used to cache files retrieved with 135 # self.retrieve(). This is not enabled by default 136 # since it does not work for changing documents (and I 137 # haven't got the logic to check expiration headers 138 # yet). 139 self.ftpcache = ftpcache 140 # Undocumented feature: you can use a different 141 # ftp cache by assigning to the .ftpcache member; 142 # in case you want logically independent URL openers 143 # XXX This is not threadsafe. Bah. 144 145 def __del__(self): 146 self.close() 147 148 def close(self): 149 self.cleanup() 150 151 def cleanup(self): 152 # This code sometimes runs when the rest of this module 153 # has already been deleted, so it can't use any globals 154 # or import anything. 155 if self.__tempfiles: 156 for file in self.__tempfiles: 157 try: 158 self.__unlink(file) 159 except OSError: 160 pass 161 del self.__tempfiles[:] 162 if self.tempcache: 163 self.tempcache.clear() 164 165 def addheader(self, *args): 166 """Add a header to be used by the HTTP interface only 167 e.g. u.addheader('Accept', 'sound/basic')""" 168 self.addheaders.append(args) 169 170 # External interface 171 def open(self, fullurl, data=None): 172 """Use URLopener().open(file) instead of open(file, 'r').""" 173 fullurl = unwrap(toBytes(fullurl)) 174 if self.tempcache and fullurl in self.tempcache: 175 filename, headers = self.tempcache[fullurl] 176 fp = open(filename, 'rb') 177 return addinfourl(fp, headers, fullurl) 178 urltype, url = splittype(fullurl) 179 if not urltype: 180 urltype = 'file' 181 if urltype in self.proxies: 182 proxy = self.proxies[urltype] 183 urltype, proxyhost = splittype(proxy) 184 host, selector = splithost(proxyhost) 185 url = (host, fullurl) # Signal special case to open_*() 186 else: 187 proxy = None 188 name = 'open_' + urltype 189 self.type = urltype 190 name = name.replace('-', '_') 191 if not hasattr(self, name): 192 if proxy: 193 return self.open_unknown_proxy(proxy, fullurl, data) 194 else: 195 return self.open_unknown(fullurl, data) 196 try: 197 if data is None: 198 return getattr(self, name)(url) 199 else: 200 return getattr(self, name)(url, data) 201 except socket.error, msg: 202 raise IOError, ('socket error', msg), sys.exc_info()[2] 203 204 def open_unknown(self, fullurl, data=None): 205 """Overridable interface to open unknown URL type.""" 206 type, url = splittype(fullurl) 207 raise IOError, ('url error', 'unknown url type', type) 208 209 def open_unknown_proxy(self, proxy, fullurl, data=None): 210 """Overridable interface to open unknown URL type.""" 211 type, url = splittype(fullurl) 212 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 213 214 # External interface 215 def retrieve(self, url, filename=None, reporthook=None, data=None): 216 """retrieve(url) returns (filename, headers) for a local object 217 or (tempfilename, headers) for a remote object.""" 218 url = unwrap(toBytes(url)) 219 if self.tempcache and url in self.tempcache: 220 return self.tempcache[url] 221 type, url1 = splittype(url) 222 if filename is None and (not type or type == 'file'): 223 try: 224 fp = self.open_local_file(url1) 225 hdrs = fp.info() 226 del fp 227 return url2pathname(splithost(url1)[1]), hdrs 228 except IOError, msg: 229 pass 230 fp = self.open(url, data) 231 headers = fp.info() 232 if filename: 233 tfp = open(filename, 'wb') 234 else: 235 import tempfile 236 garbage, path = splittype(url) 237 garbage, path = splithost(path or "") 238 path, garbage = splitquery(path or "") 239 path, garbage = splitattr(path or "") 240 suffix = os.path.splitext(path)[1] 241 (fd, filename) = tempfile.mkstemp(suffix) 242 self.__tempfiles.append(filename) 243 tfp = os.fdopen(fd, 'wb') 244 result = filename, headers 245 if self.tempcache is not None: 246 self.tempcache[url] = result 247 bs = 1024*8 248 size = -1 249 read = 0 250 blocknum = 0 251 if reporthook: 252 if "content-length" in headers: 253 size = int(headers["Content-Length"]) 254 reporthook(blocknum, bs, size) 255 while 1: 256 block = fp.read(bs) 257 if block == "": 258 break 259 read += len(block) 260 tfp.write(block) 261 blocknum += 1 262 if reporthook: 263 reporthook(blocknum, bs, size) 264 fp.close() 265 tfp.close() 266 del fp 267 del tfp 268 269 # raise exception if actual size does not match content-length header 270 if size >= 0 and read < size: 271 raise ContentTooShortError("retrieval incomplete: got only %i out " 272 "of %i bytes" % (read, size), result) 273 274 return result 275 276 # Each method named open_<type> knows how to open that type of URL 277 278 def open_http(self, url, data=None): 279 """Use HTTP protocol.""" 280 import httplib 281 user_passwd = None 282 proxy_passwd= None 283 if isinstance(url, str): 284 host, selector = splithost(url) 285 if host: 286 user_passwd, host = splituser(host) 287 host = unquote(host) 288 realhost = host 289 else: 290 host, selector = url 291 # check whether the proxy contains authorization information 292 proxy_passwd, host = splituser(host) 293 # now we proceed with the url we want to obtain 294 urltype, rest = splittype(selector) 295 url = rest 296 user_passwd = None 297 if urltype.lower() != 'http': 298 realhost = None 299 else: 300 realhost, rest = splithost(rest) 301 if realhost: 302 user_passwd, realhost = splituser(realhost) 303 if user_passwd: 304 selector = "%s://%s%s" % (urltype, realhost, rest) 305 if proxy_bypass(realhost): 306 host = realhost 307 308 #print "proxy via http:", host, selector 309 if not host: raise IOError, ('http error', 'no host given') 310 311 if proxy_passwd: 312 import base64 313 proxy_auth = base64.b64encode(proxy_passwd).strip() 314 else: 315 proxy_auth = None 316 317 if user_passwd: 318 import base64 319 auth = base64.b64encode(user_passwd).strip() 320 else: 321 auth = None 322 h = httplib.HTTP(host) 323 if data is not None: 324 h.putrequest('POST', selector) 325 h.putheader('Content-Type', 'application/x-www-form-urlencoded') 326 h.putheader('Content-Length', '%d' % len(data)) 327 else: 328 h.putrequest('GET', selector) 329 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 330 if auth: h.putheader('Authorization', 'Basic %s' % auth) 331 if realhost: h.putheader('Host', realhost) 332 for args in self.addheaders: h.putheader(*args) 333 h.endheaders() 334 if data is not None: 335 h.send(data) 336 errcode, errmsg, headers = h.getreply() 337 fp = h.getfile() 338 if errcode == -1: 339 if fp: fp.close() 340 # something went wrong with the HTTP status line 341 raise IOError, ('http protocol error', 0, 342 'got a bad status line', None) 343 # According to RFC 2616, "2xx" code indicates that the client's 344 # request was successfully received, understood, and accepted. 345 if (200 <= errcode < 300): 346 return addinfourl(fp, headers, "http:" + url, errcode) 347 else: 348 if data is None: 349 return self.http_error(url, fp, errcode, errmsg, headers) 350 else: 351 return self.http_error(url, fp, errcode, errmsg, headers, data) 352 353 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 354 """Handle http errors. 355 Derived class can override this, or provide specific handlers 356 named http_error_DDD where DDD is the 3-digit error code.""" 357 # First check if there's a specific handler for this error 358 name = 'http_error_%d' % errcode 359 if hasattr(self, name): 360 method = getattr(self, name) 361 if data is None: 362 result = method(url, fp, errcode, errmsg, headers) 363 else: 364 result = method(url, fp, errcode, errmsg, headers, data) 365 if result: return result 366 return self.http_error_default(url, fp, errcode, errmsg, headers) 367 368 def http_error_default(self, url, fp, errcode, errmsg, headers): 369 """Default error handler: close the connection and raise IOError.""" 370 void = fp.read() 371 fp.close() 372 raise IOError, ('http error', errcode, errmsg, headers) 373 374 if _have_ssl: 375 def open_https(self, url, data=None): 376 """Use HTTPS protocol.""" 377 378 import httplib 379 user_passwd = None 380 proxy_passwd = None 381 if isinstance(url, str): 382 host, selector = splithost(url) 383 if host: 384 user_passwd, host = splituser(host) 385 host = unquote(host) 386 realhost = host 387 else: 388 host, selector = url 389 # here, we determine, whether the proxy contains authorization information 390 proxy_passwd, host = splituser(host) 391 urltype, rest = splittype(selector) 392 url = rest 393 user_passwd = None 394 if urltype.lower() != 'https': 395 realhost = None 396 else: 397 realhost, rest = splithost(rest) 398 if realhost: 399 user_passwd, realhost = splituser(realhost) 400 if user_passwd: 401 selector = "%s://%s%s" % (urltype, realhost, rest) 402 #print "proxy via https:", host, selector 403 if not host: raise IOError, ('https error', 'no host given') 404 if proxy_passwd: 405 import base64 406 proxy_auth = base64.b64encode(proxy_passwd).strip() 407 else: 408 proxy_auth = None 409 if user_passwd: 410 import base64 411 auth = base64.b64encode(user_passwd).strip() 412 else: 413 auth = None 414 h = httplib.HTTPS(host, 0, 415 key_file=self.key_file, 416 cert_file=self.cert_file) 417 if data is not None: 418 h.putrequest('POST', selector) 419 h.putheader('Content-Type', 420 'application/x-www-form-urlencoded') 421 h.putheader('Content-Length', '%d' % len(data)) 422 else: 423 h.putrequest('GET', selector) 424 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 425 if auth: h.putheader('Authorization', 'Basic %s' % auth) 426 if realhost: h.putheader('Host', realhost) 427 for args in self.addheaders: h.putheader(*args) 428 h.endheaders() 429 if data is not None: 430 h.send(data) 431 errcode, errmsg, headers = h.getreply() 432 fp = h.getfile() 433 if errcode == -1: 434 if fp: fp.close() 435 # something went wrong with the HTTP status line 436 raise IOError, ('http protocol error', 0, 437 'got a bad status line', None) 438 # According to RFC 2616, "2xx" code indicates that the client's 439 # request was successfully received, understood, and accepted. 440 if (200 <= errcode < 300): 441 return addinfourl(fp, headers, "https:" + url, errcode) 442 else: 443 if data is None: 444 return self.http_error(url, fp, errcode, errmsg, headers) 445 else: 446 return self.http_error(url, fp, errcode, errmsg, headers, 447 data) 448 449 def open_file(self, url): 450 """Use local file or FTP depending on form of URL.""" 451 if not isinstance(url, str): 452 raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 453 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 454 return self.open_ftp(url) 455 else: 456 return self.open_local_file(url) 457 458 def open_local_file(self, url): 459 """Use local file.""" 460 import mimetypes, mimetools, email.utils 461 try: 462 from cStringIO import StringIO 463 except ImportError: 464 from StringIO import StringIO 465 host, file = splithost(url) 466 localname = url2pathname(file) 467 try: 468 stats = os.stat(localname) 469 except OSError, e: 470 raise IOError(e.errno, e.strerror, e.filename) 471 size = stats.st_size 472 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 473 mtype = mimetypes.guess_type(url)[0] 474 headers = mimetools.Message(StringIO( 475 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 476 (mtype or 'text/plain', size, modified))) 477 if not host: 478 urlfile = file 479 if file[:1] == '/': 480 urlfile = 'file://' + file 481 return addinfourl(open(localname, 'rb'), 482 headers, urlfile) 483 host, port = splitport(host) 484 if not port \ 485 and socket.gethostbyname(host) in (localhost(), thishost()): 486 urlfile = file 487 if file[:1] == '/': 488 urlfile = 'file://' + file 489 return addinfourl(open(localname, 'rb'), 490 headers, urlfile) 491 raise IOError, ('local file error', 'not on local host') 492 493 def open_ftp(self, url): 494 """Use FTP protocol.""" 495 if not isinstance(url, str): 496 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 497 import mimetypes, mimetools 498 try: 499 from cStringIO import StringIO 500 except ImportError: 501 from StringIO import StringIO 502 host, path = splithost(url) 503 if not host: raise IOError, ('ftp error', 'no host given') 504 host, port = splitport(host) 505 user, host = splituser(host) 506 if user: user, passwd = splitpasswd(user) 507 else: passwd = None 508 host = unquote(host) 509 user = unquote(user or '') 510 passwd = unquote(passwd or '') 511 host = socket.gethostbyname(host) 512 if not port: 513 import ftplib 514 port = ftplib.FTP_PORT 515 else: 516 port = int(port) 517 path, attrs = splitattr(path) 518 path = unquote(path) 519 dirs = path.split('/') 520 dirs, file = dirs[:-1], dirs[-1] 521 if dirs and not dirs[0]: dirs = dirs[1:] 522 if dirs and not dirs[0]: dirs[0] = '/' 523 key = user, host, port, '/'.join(dirs) 524 # XXX thread unsafe! 525 if len(self.ftpcache) > MAXFTPCACHE: 526 # Prune the cache, rather arbitrarily 527 for k in self.ftpcache.keys(): 528 if k != key: 529 v = self.ftpcache[k] 530 del self.ftpcache[k] 531 v.close() 532 try: 533 if not key in self.ftpcache: 534 self.ftpcache[key] = \ 535 ftpwrapper(user, passwd, host, port, dirs) 536 if not file: type = 'D' 537 else: type = 'I' 538 for attr in attrs: 539 attr, value = splitvalue(attr) 540 if attr.lower() == 'type' and \ 541 value in ('a', 'A', 'i', 'I', 'd', 'D'): 542 type = value.upper() 543 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 544 mtype = mimetypes.guess_type("ftp:" + url)[0] 545 headers = "" 546 if mtype: 547 headers += "Content-Type: %s\n" % mtype 548 if retrlen is not None and retrlen >= 0: 549 headers += "Content-Length: %d\n" % retrlen 550 headers = mimetools.Message(StringIO(headers)) 551 return addinfourl(fp, headers, "ftp:" + url) 552 except ftperrors(), msg: 553 raise IOError, ('ftp error', msg), sys.exc_info()[2] 554 555 def open_data(self, url, data=None): 556 """Use "data" URL.""" 557 if not isinstance(url, str): 558 raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 559 # ignore POSTed data 560 # 561 # syntax of data URLs: 562 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 563 # mediatype := [ type "/" subtype ] *( ";" parameter ) 564 # data := *urlchar 565 # parameter := attribute "=" value 566 import mimetools 567 try: 568 from cStringIO import StringIO 569 except ImportError: 570 from StringIO import StringIO 571 try: 572 [type, data] = url.split(',', 1) 573 except ValueError: 574 raise IOError, ('data error', 'bad data URL') 575 if not type: 576 type = 'text/plain;charset=US-ASCII' 577 semi = type.rfind(';') 578 if semi >= 0 and '=' not in type[semi:]: 579 encoding = type[semi+1:] 580 type = type[:semi] 581 else: 582 encoding = '' 583 msg = [] 584 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT', 585 time.gmtime(time.time()))) 586 msg.append('Content-type: %s' % type) 587 if encoding == 'base64': 588 import base64 589 data = base64.decodestring(data) 590 else: 591 data = unquote(data) 592 msg.append('Content-Length: %d' % len(data)) 593 msg.append('') 594 msg.append(data) 595 msg = '\n'.join(msg) 596 f = StringIO(msg) 597 headers = mimetools.Message(f, 0) 598 #f.fileno = None # needed for addinfourl 599 return addinfourl(f, headers, url) 600 601 602class FancyURLopener(URLopener): 603 """Derived class with handlers for errors we can handle (perhaps).""" 604 605 def __init__(self, *args, **kwargs): 606 URLopener.__init__(self, *args, **kwargs) 607 self.auth_cache = {} 608 self.tries = 0 609 self.maxtries = 10 610 611 def http_error_default(self, url, fp, errcode, errmsg, headers): 612 """Default error handling -- don't raise an exception.""" 613 return addinfourl(fp, headers, "http:" + url, errcode) 614 615 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 616 """Error 302 -- relocated (temporarily).""" 617 self.tries += 1 618 if self.maxtries and self.tries >= self.maxtries: 619 if hasattr(self, "http_error_500"): 620 meth = self.http_error_500 621 else: 622 meth = self.http_error_default 623 self.tries = 0 624 return meth(url, fp, 500, 625 "Internal Server Error: Redirect Recursion", headers) 626 result = self.redirect_internal(url, fp, errcode, errmsg, headers, 627 data) 628 self.tries = 0 629 return result 630 631 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 632 if 'location' in headers: 633 newurl = headers['location'] 634 elif 'uri' in headers: 635 newurl = headers['uri'] 636 else: 637 return 638 void = fp.read() 639 fp.close() 640 # In case the server sent a relative URL, join with original: 641 newurl = basejoin(self.type + ":" + url, newurl) 642 return self.open(newurl) 643 644 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 645 """Error 301 -- also relocated (permanently).""" 646 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 647 648 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 649 """Error 303 -- also relocated (essentially identical to 302).""" 650 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 651 652 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 653 """Error 307 -- relocated, but turn POST into error.""" 654 if data is None: 655 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 656 else: 657 return self.http_error_default(url, fp, errcode, errmsg, headers) 658 659 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 660 """Error 401 -- authentication required. 661 This function supports Basic authentication only.""" 662 if not 'www-authenticate' in headers: 663 URLopener.http_error_default(self, url, fp, 664 errcode, errmsg, headers) 665 stuff = headers['www-authenticate'] 666 import re 667 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 668 if not match: 669 URLopener.http_error_default(self, url, fp, 670 errcode, errmsg, headers) 671 scheme, realm = match.groups() 672 if scheme.lower() != 'basic': 673 URLopener.http_error_default(self, url, fp, 674 errcode, errmsg, headers) 675 name = 'retry_' + self.type + '_basic_auth' 676 if data is None: 677 return getattr(self,name)(url, realm) 678 else: 679 return getattr(self,name)(url, realm, data) 680 681 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 682 """Error 407 -- proxy authentication required. 683 This function supports Basic authentication only.""" 684 if not 'proxy-authenticate' in headers: 685 URLopener.http_error_default(self, url, fp, 686 errcode, errmsg, headers) 687 stuff = headers['proxy-authenticate'] 688 import re 689 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 690 if not match: 691 URLopener.http_error_default(self, url, fp, 692 errcode, errmsg, headers) 693 scheme, realm = match.groups() 694 if scheme.lower() != 'basic': 695 URLopener.http_error_default(self, url, fp, 696 errcode, errmsg, headers) 697 name = 'retry_proxy_' + self.type + '_basic_auth' 698 if data is None: 699 return getattr(self,name)(url, realm) 700 else: 701 return getattr(self,name)(url, realm, data) 702 703 def retry_proxy_http_basic_auth(self, url, realm, data=None): 704 host, selector = splithost(url) 705 newurl = 'http://' + host + selector 706 proxy = self.proxies['http'] 707 urltype, proxyhost = splittype(proxy) 708 proxyhost, proxyselector = splithost(proxyhost) 709 i = proxyhost.find('@') + 1 710 proxyhost = proxyhost[i:] 711 user, passwd = self.get_user_passwd(proxyhost, realm, i) 712 if not (user or passwd): return None 713 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 714 self.proxies['http'] = 'http://' + proxyhost + proxyselector 715 if data is None: 716 return self.open(newurl) 717 else: 718 return self.open(newurl, data) 719 720 def retry_proxy_https_basic_auth(self, url, realm, data=None): 721 host, selector = splithost(url) 722 newurl = 'https://' + host + selector 723 proxy = self.proxies['https'] 724 urltype, proxyhost = splittype(proxy) 725 proxyhost, proxyselector = splithost(proxyhost) 726 i = proxyhost.find('@') + 1 727 proxyhost = proxyhost[i:] 728 user, passwd = self.get_user_passwd(proxyhost, realm, i) 729 if not (user or passwd): return None 730 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 731 self.proxies['https'] = 'https://' + proxyhost + proxyselector 732 if data is None: 733 return self.open(newurl) 734 else: 735 return self.open(newurl, data) 736 737 def retry_http_basic_auth(self, url, realm, data=None): 738 host, selector = splithost(url) 739 i = host.find('@') + 1 740 host = host[i:] 741 user, passwd = self.get_user_passwd(host, realm, i) 742 if not (user or passwd): return None 743 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 744 newurl = 'http://' + host + selector 745 if data is None: 746 return self.open(newurl) 747 else: 748 return self.open(newurl, data) 749 750 def retry_https_basic_auth(self, url, realm, data=None): 751 host, selector = splithost(url) 752 i = host.find('@') + 1 753 host = host[i:] 754 user, passwd = self.get_user_passwd(host, realm, i) 755 if not (user or passwd): return None 756 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 757 newurl = 'https://' + host + selector 758 if data is None: 759 return self.open(newurl) 760 else: 761 return self.open(newurl, data) 762 763 def get_user_passwd(self, host, realm, clear_cache = 0): 764 key = realm + '@' + host.lower() 765 if key in self.auth_cache: 766 if clear_cache: 767 del self.auth_cache[key] 768 else: 769 return self.auth_cache[key] 770 user, passwd = self.prompt_user_passwd(host, realm) 771 if user or passwd: self.auth_cache[key] = (user, passwd) 772 return user, passwd 773 774 def prompt_user_passwd(self, host, realm): 775 """Override this in a GUI environment!""" 776 import getpass 777 try: 778 user = raw_input("Enter username for %s at %s: " % (realm, 779 host)) 780 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 781 (user, realm, host)) 782 return user, passwd 783 except KeyboardInterrupt: 784 print 785 return None, None 786 787 788# Utility functions 789 790_localhost = None 791def localhost(): 792 """Return the IP address of the magic hostname 'localhost'.""" 793 global _localhost 794 if _localhost is None: 795 _localhost = socket.gethostbyname('localhost') 796 return _localhost 797 798_thishost = None 799def thishost(): 800 """Return the IP address of the current host.""" 801 global _thishost 802 if _thishost is None: 803 _thishost = socket.gethostbyname(socket.gethostname()) 804 return _thishost 805 806_ftperrors = None 807def ftperrors(): 808 """Return the set of errors raised by the FTP class.""" 809 global _ftperrors 810 if _ftperrors is None: 811 import ftplib 812 _ftperrors = ftplib.all_errors 813 return _ftperrors 814 815_noheaders = None 816def noheaders(): 817 """Return an empty mimetools.Message object.""" 818 global _noheaders 819 if _noheaders is None: 820 import mimetools 821 try: 822 from cStringIO import StringIO 823 except ImportError: 824 from StringIO import StringIO 825 _noheaders = mimetools.Message(StringIO(), 0) 826 _noheaders.fp.close() # Recycle file descriptor 827 return _noheaders 828 829 830# Utility classes 831 832class ftpwrapper: 833 """Class used by open_ftp() for cache of open FTP connections.""" 834 835 def __init__(self, user, passwd, host, port, dirs, timeout=None): 836 self.user = user 837 self.passwd = passwd 838 self.host = host 839 self.port = port 840 self.dirs = dirs 841 self.timeout = timeout 842 self.init() 843 844 def init(self): 845 import ftplib 846 self.busy = 0 847 self.ftp = ftplib.FTP() 848 self.ftp.connect(self.host, self.port, self.timeout) 849 self.ftp.login(self.user, self.passwd) 850 for dir in self.dirs: 851 self.ftp.cwd(dir) 852 853 def retrfile(self, file, type): 854 import ftplib 855 self.endtransfer() 856 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 857 else: cmd = 'TYPE ' + type; isdir = 0 858 try: 859 self.ftp.voidcmd(cmd) 860 except ftplib.all_errors: 861 self.init() 862 self.ftp.voidcmd(cmd) 863 conn = None 864 if file and not isdir: 865 # Try to retrieve as a file 866 try: 867 cmd = 'RETR ' + file 868 conn = self.ftp.ntransfercmd(cmd) 869 except ftplib.error_perm, reason: 870 if str(reason)[:3] != '550': 871 raise IOError, ('ftp error', reason), sys.exc_info()[2] 872 if not conn: 873 # Set transfer mode to ASCII! 874 self.ftp.voidcmd('TYPE A') 875 # Try a directory listing. Verify that directory exists. 876 if file: 877 pwd = self.ftp.pwd() 878 try: 879 try: 880 self.ftp.cwd(file) 881 except ftplib.error_perm, reason: 882 raise IOError, ('ftp error', reason), sys.exc_info()[2] 883 finally: 884 self.ftp.cwd(pwd) 885 cmd = 'LIST ' + file 886 else: 887 cmd = 'LIST' 888 conn = self.ftp.ntransfercmd(cmd) 889 self.busy = 1 890 # Pass back both a suitably decorated object and a retrieval length 891 return (addclosehook(conn[0].makefile('rb'), 892 self.endtransfer), conn[1]) 893 def endtransfer(self): 894 if not self.busy: 895 return 896 self.busy = 0 897 try: 898 self.ftp.voidresp() 899 except ftperrors(): 900 pass 901 902 def close(self): 903 self.endtransfer() 904 try: 905 self.ftp.close() 906 except ftperrors(): 907 pass 908 909class addbase: 910 """Base class for addinfo and addclosehook.""" 911 912 def __init__(self, fp): 913 self.fp = fp 914 self.read = self.fp.read 915 self.readline = self.fp.readline 916 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 917 if hasattr(self.fp, "fileno"): 918 self.fileno = self.fp.fileno 919 else: 920 self.fileno = lambda: None 921 if hasattr(self.fp, "__iter__"): 922 self.__iter__ = self.fp.__iter__ 923 if hasattr(self.fp, "next"): 924 self.next = self.fp.next 925 926 def __repr__(self): 927 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 928 id(self), self.fp) 929 930 def close(self): 931 self.read = None 932 self.readline = None 933 self.readlines = None 934 self.fileno = None 935 if self.fp: self.fp.close() 936 self.fp = None 937 938class addclosehook(addbase): 939 """Class to add a close hook to an open file.""" 940 941 def __init__(self, fp, closehook, *hookargs): 942 addbase.__init__(self, fp) 943 self.closehook = closehook 944 self.hookargs = hookargs 945 946 def close(self): 947 addbase.close(self) 948 if self.closehook: 949 self.closehook(*self.hookargs) 950 self.closehook = None 951 self.hookargs = None 952 953class addinfo(addbase): 954 """class to add an info() method to an open file.""" 955 956 def __init__(self, fp, headers): 957 addbase.__init__(self, fp) 958 self.headers = headers 959 960 def info(self): 961 return self.headers 962 963class addinfourl(addbase): 964 """class to add info() and geturl() methods to an open file.""" 965 966 def __init__(self, fp, headers, url, code=None): 967 addbase.__init__(self, fp) 968 self.headers = headers 969 self.url = url 970 self.code = code 971 972 def info(self): 973 return self.headers 974 975 def getcode(self): 976 return self.code 977 978 def geturl(self): 979 return self.url 980 981 982# Utilities to parse URLs (most of these return None for missing parts): 983# unwrap('<URL:type://host/path>') --> 'type://host/path' 984# splittype('type:opaquestring') --> 'type', 'opaquestring' 985# splithost('//host[:port]/path') --> 'host[:port]', '/path' 986# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 987# splitpasswd('user:passwd') -> 'user', 'passwd' 988# splitport('host:port') --> 'host', 'port' 989# splitquery('/path?query') --> '/path', 'query' 990# splittag('/path#tag') --> '/path', 'tag' 991# splitattr('/path;attr1=value1;attr2=value2;...') -> 992# '/path', ['attr1=value1', 'attr2=value2', ...] 993# splitvalue('attr=value') --> 'attr', 'value' 994# unquote('abc%20def') -> 'abc def' 995# quote('abc def') -> 'abc%20def') 996 997try: 998 unicode 999except NameError: 1000 def _is_unicode(x): 1001 return 0 1002else: 1003 def _is_unicode(x): 1004 return isinstance(x, unicode) 1005 1006def toBytes(url): 1007 """toBytes(u"URL") --> 'URL'.""" 1008 # Most URL schemes require ASCII. If that changes, the conversion 1009 # can be relaxed 1010 if _is_unicode(url): 1011 try: 1012 url = url.encode("ASCII") 1013 except UnicodeError: 1014 raise UnicodeError("URL " + repr(url) + 1015 " contains non-ASCII characters") 1016 return url 1017 1018def unwrap(url): 1019 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 1020 url = url.strip() 1021 if url[:1] == '<' and url[-1:] == '>': 1022 url = url[1:-1].strip() 1023 if url[:4] == 'URL:': url = url[4:].strip() 1024 return url 1025 1026_typeprog = None 1027def splittype(url): 1028 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 1029 global _typeprog 1030 if _typeprog is None: 1031 import re 1032 _typeprog = re.compile('^([^/:]+):') 1033 1034 match = _typeprog.match(url) 1035 if match: 1036 scheme = match.group(1) 1037 return scheme.lower(), url[len(scheme) + 1:] 1038 return None, url 1039 1040_hostprog = None 1041def splithost(url): 1042 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 1043 global _hostprog 1044 if _hostprog is None: 1045 import re 1046 _hostprog = re.compile('^//([^/?]*)(.*)$') 1047 1048 match = _hostprog.match(url) 1049 if match: return match.group(1, 2) 1050 return None, url 1051 1052_userprog = None 1053def splituser(host): 1054 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 1055 global _userprog 1056 if _userprog is None: 1057 import re 1058 _userprog = re.compile('^(.*)@(.*)$') 1059 1060 match = _userprog.match(host) 1061 if match: return map(unquote, match.group(1, 2)) 1062 return None, host 1063 1064_passwdprog = None 1065def splitpasswd(user): 1066 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 1067 global _passwdprog 1068 if _passwdprog is None: 1069 import re 1070 _passwdprog = re.compile('^([^:]*):(.*)$') 1071 1072 match = _passwdprog.match(user) 1073 if match: return match.group(1, 2) 1074 return user, None 1075 1076# splittag('/path#tag') --> '/path', 'tag' 1077_portprog = None 1078def splitport(host): 1079 """splitport('host:port') --> 'host', 'port'.""" 1080 global _portprog 1081 if _portprog is None: 1082 import re 1083 _portprog = re.compile('^(.*):([0-9]+)$') 1084 1085 match = _portprog.match(host) 1086 if match: return match.group(1, 2) 1087 return host, None 1088 1089_nportprog = None 1090def splitnport(host, defport=-1): 1091 """Split host and port, returning numeric port. 1092 Return given default port if no ':' found; defaults to -1. 1093 Return numerical port if a valid number are found after ':'. 1094 Return None if ':' but not a valid number.""" 1095 global _nportprog 1096 if _nportprog is None: 1097 import re 1098 _nportprog = re.compile('^(.*):(.*)$') 1099 1100 match = _nportprog.match(host) 1101 if match: 1102 host, port = match.group(1, 2) 1103 try: 1104 if not port: raise ValueError, "no digits" 1105 nport = int(port) 1106 except ValueError: 1107 nport = None 1108 return host, nport 1109 return host, defport 1110 1111_queryprog = None 1112def splitquery(url): 1113 """splitquery('/path?query') --> '/path', 'query'.""" 1114 global _queryprog 1115 if _queryprog is None: 1116 import re 1117 _queryprog = re.compile('^(.*)\?([^?]*)$') 1118 1119 match = _queryprog.match(url) 1120 if match: return match.group(1, 2) 1121 return url, None 1122 1123_tagprog = None 1124def splittag(url): 1125 """splittag('/path#tag') --> '/path', 'tag'.""" 1126 global _tagprog 1127 if _tagprog is None: 1128 import re 1129 _tagprog = re.compile('^(.*)#([^#]*)$') 1130 1131 match = _tagprog.match(url) 1132 if match: return match.group(1, 2) 1133 return url, None 1134 1135def splitattr(url): 1136 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1137 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1138 words = url.split(';') 1139 return words[0], words[1:] 1140 1141_valueprog = None 1142def splitvalue(attr): 1143 """splitvalue('attr=value') --> 'attr', 'value'.""" 1144 global _valueprog 1145 if _valueprog is None: 1146 import re 1147 _valueprog = re.compile('^([^=]*)=(.*)$') 1148 1149 match = _valueprog.match(attr) 1150 if match: return match.group(1, 2) 1151 return attr, None 1152 1153_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) 1154_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) 1155 1156def unquote(s): 1157 """unquote('abc%20def') -> 'abc def'.""" 1158 res = s.split('%') 1159 for i in xrange(1, len(res)): 1160 item = res[i] 1161 try: 1162 res[i] = _hextochr[item[:2]] + item[2:] 1163 except KeyError: 1164 res[i] = '%' + item 1165 except UnicodeDecodeError: 1166 res[i] = unichr(int(item[:2], 16)) + item[2:] 1167 return "".join(res) 1168 1169def unquote_plus(s): 1170 """unquote('%7e/abc+def') -> '~/abc def'""" 1171 s = s.replace('+', ' ') 1172 return unquote(s) 1173 1174always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1175 'abcdefghijklmnopqrstuvwxyz' 1176 '0123456789' '_.-') 1177_safemaps = {} 1178 1179def quote(s, safe = '/'): 1180 """quote('abc def') -> 'abc%20def' 1181 1182 Each part of a URL, e.g. the path info, the query, etc., has a 1183 different set of reserved characters that must be quoted. 1184 1185 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1186 the following reserved characters. 1187 1188 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1189 "$" | "," 1190 1191 Each of these characters is reserved in some component of a URL, 1192 but not necessarily in all of them. 1193 1194 By default, the quote function is intended for quoting the path 1195 section of a URL. Thus, it will not encode '/'. This character 1196 is reserved, but in typical usage the quote function is being 1197 called on a path where the existing slash characters are used as 1198 reserved characters. 1199 """ 1200 cachekey = (safe, always_safe) 1201 try: 1202 safe_map = _safemaps[cachekey] 1203 except KeyError: 1204 safe += always_safe 1205 safe_map = {} 1206 for i in range(256): 1207 c = chr(i) 1208 safe_map[c] = (c in safe) and c or ('%%%02X' % i) 1209 _safemaps[cachekey] = safe_map 1210 res = map(safe_map.__getitem__, s) 1211 return ''.join(res) 1212 1213def quote_plus(s, safe = ''): 1214 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1215 if ' ' in s: 1216 s = quote(s, safe + ' ') 1217 return s.replace(' ', '+') 1218 return quote(s, safe) 1219 1220def urlencode(query,doseq=0): 1221 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1222 1223 If any values in the query arg are sequences and doseq is true, each 1224 sequence element is converted to a separate parameter. 1225 1226 If the query arg is a sequence of two-element tuples, the order of the 1227 parameters in the output will match the order of parameters in the 1228 input. 1229 """ 1230 1231 if hasattr(query,"items"): 1232 # mapping objects 1233 query = query.items() 1234 else: 1235 # it's a bother at times that strings and string-like objects are 1236 # sequences... 1237 try: 1238 # non-sequence items should not work with len() 1239 # non-empty strings will fail this 1240 if len(query) and not isinstance(query[0], tuple): 1241 raise TypeError 1242 # zero-length sequences of all types will get here and succeed, 1243 # but that's a minor nit - since the original implementation 1244 # allowed empty dicts that type of behavior probably should be 1245 # preserved for consistency 1246 except TypeError: 1247 ty,va,tb = sys.exc_info() 1248 raise TypeError, "not a valid non-string sequence or mapping object", tb 1249 1250 l = [] 1251 if not doseq: 1252 # preserve old behavior 1253 for k, v in query: 1254 k = quote_plus(str(k)) 1255 v = quote_plus(str(v)) 1256 l.append(k + '=' + v) 1257 else: 1258 for k, v in query: 1259 k = quote_plus(str(k)) 1260 if isinstance(v, str): 1261 v = quote_plus(v) 1262 l.append(k + '=' + v) 1263 elif _is_unicode(v): 1264 # is there a reasonable way to convert to ASCII? 1265 # encode generates a string, but "replace" or "ignore" 1266 # lose information and "strict" can raise UnicodeError 1267 v = quote_plus(v.encode("ASCII","replace")) 1268 l.append(k + '=' + v) 1269 else: 1270 try: 1271 # is this a sufficient test for sequence-ness? 1272 x = len(v) 1273 except TypeError: 1274 # not a sequence 1275 v = quote_plus(str(v)) 1276 l.append(k + '=' + v) 1277 else: 1278 # loop over the sequence 1279 for elt in v: 1280 l.append(k + '=' + quote_plus(str(elt))) 1281 return '&'.join(l) 1282 1283# Proxy handling 1284def getproxies_environment(): 1285 """Return a dictionary of scheme -> proxy server URL mappings. 1286 1287 Scan the environment for variables named <scheme>_proxy; 1288 this seems to be the standard convention. If you need a 1289 different way, you can pass a proxies dictionary to the 1290 [Fancy]URLopener constructor. 1291 1292 """ 1293 proxies = {} 1294 for name, value in os.environ.items(): 1295 name = name.lower() 1296 if name == 'no_proxy': 1297 # handled in proxy_bypass_environment 1298 continue 1299 if value and name[-6:] == '_proxy': 1300 proxies[name[:-6]] = value 1301 return proxies 1302 1303def proxy_bypass_environment(host): 1304 """Test if proxies should not be used for a particular host. 1305 1306 Checks the environment for a variable named no_proxy, which should 1307 be a list of DNS suffixes separated by commas, or '*' for all hosts. 1308 """ 1309 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 1310 # '*' is special case for always bypass 1311 if no_proxy == '*': 1312 return 1 1313 # strip port off host 1314 hostonly, port = splitport(host) 1315 # check if the host ends with any of the DNS suffixes 1316 for name in no_proxy.split(','): 1317 if name and (hostonly.endswith(name) or host.endswith(name)): 1318 return 1 1319 # otherwise, don't bypass 1320 return 0 1321 1322 1323if sys.platform == 'darwin': 1324 def getproxies_internetconfig(): 1325 """Return a dictionary of scheme -> proxy server URL mappings. 1326 1327 By convention the mac uses Internet Config to store 1328 proxies. An HTTP proxy, for instance, is stored under 1329 the HttpProxy key. 1330 1331 """ 1332 try: 1333 import ic 1334 except ImportError: 1335 return {} 1336 1337 try: 1338 config = ic.IC() 1339 except ic.error: 1340 return {} 1341 proxies = {} 1342 # HTTP: 1343 if 'UseHTTPProxy' in config and config['UseHTTPProxy']: 1344 try: 1345 value = config['HTTPProxyHost'] 1346 except ic.error: 1347 pass 1348 else: 1349 proxies['http'] = 'http://%s' % value 1350 # FTP: XXX To be done. 1351 # Gopher: XXX To be done. 1352 return proxies 1353 1354 def proxy_bypass(host): 1355 if getproxies_environment(): 1356 return proxy_bypass_environment(host) 1357 else: 1358 return 0 1359 1360 def getproxies(): 1361 return getproxies_environment() or getproxies_internetconfig() 1362 1363elif os.name == 'nt': 1364 def getproxies_registry(): 1365 """Return a dictionary of scheme -> proxy server URL mappings. 1366 1367 Win32 uses the registry to store proxies. 1368 1369 """ 1370 proxies = {} 1371 try: 1372 import _winreg 1373 except ImportError: 1374 # Std module, so should be around - but you never know! 1375 return proxies 1376 try: 1377 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1378 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1379 proxyEnable = _winreg.QueryValueEx(internetSettings, 1380 'ProxyEnable')[0] 1381 if proxyEnable: 1382 # Returned as Unicode but problems if not converted to ASCII 1383 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1384 'ProxyServer')[0]) 1385 if '=' in proxyServer: 1386 # Per-protocol settings 1387 for p in proxyServer.split(';'): 1388 protocol, address = p.split('=', 1) 1389 # See if address has a type:// prefix 1390 import re 1391 if not re.match('^([^/:]+)://', address): 1392 address = '%s://%s' % (protocol, address) 1393 proxies[protocol] = address 1394 else: 1395 # Use one setting for all protocols 1396 if proxyServer[:5] == 'http:': 1397 proxies['http'] = proxyServer 1398 else: 1399 proxies['http'] = 'http://%s' % proxyServer 1400 proxies['ftp'] = 'ftp://%s' % proxyServer 1401 internetSettings.Close() 1402 except (WindowsError, ValueError, TypeError): 1403 # Either registry key not found etc, or the value in an 1404 # unexpected format. 1405 # proxies already set up to be empty so nothing to do 1406 pass 1407 return proxies 1408 1409 def getproxies(): 1410 """Return a dictionary of scheme -> proxy server URL mappings. 1411 1412 Returns settings gathered from the environment, if specified, 1413 or the registry. 1414 1415 """ 1416 return getproxies_environment() or getproxies_registry() 1417 1418 def proxy_bypass_registry(host): 1419 try: 1420 import _winreg 1421 import re 1422 except ImportError: 1423 # Std modules, so should be around - but you never know! 1424 return 0 1425 try: 1426 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1427 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1428 proxyEnable = _winreg.QueryValueEx(internetSettings, 1429 'ProxyEnable')[0] 1430 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1431 'ProxyOverride')[0]) 1432 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1433 except WindowsError: 1434 return 0 1435 if not proxyEnable or not proxyOverride: 1436 return 0 1437 # try to make a host list from name and IP address. 1438 rawHost, port = splitport(host) 1439 host = [rawHost] 1440 try: 1441 addr = socket.gethostbyname(rawHost) 1442 if addr != rawHost: 1443 host.append(addr) 1444 except socket.error: 1445 pass 1446 try: 1447 fqdn = socket.getfqdn(rawHost) 1448 if fqdn != rawHost: 1449 host.append(fqdn) 1450 except socket.error: 1451 pass 1452 # make a check value list from the registry entry: replace the 1453 # '<local>' string by the localhost entry and the corresponding 1454 # canonical entry. 1455 proxyOverride = proxyOverride.split(';') 1456 i = 0 1457 while i < len(proxyOverride): 1458 if proxyOverride[i] == '<local>': 1459 proxyOverride[i:i+1] = ['localhost', 1460 '127.0.0.1', 1461 socket.gethostname(), 1462 socket.gethostbyname( 1463 socket.gethostname())] 1464 i += 1 1465 # print proxyOverride 1466 # now check if we match one of the registry values. 1467 for test in proxyOverride: 1468 test = test.replace(".", r"\.") # mask dots 1469 test = test.replace("*", r".*") # change glob sequence 1470 test = test.replace("?", r".") # change glob char 1471 for val in host: 1472 # print "%s <--> %s" %( test, val ) 1473 if re.match(test, val, re.I): 1474 return 1 1475 return 0 1476 1477 def proxy_bypass(host): 1478 """Return a dictionary of scheme -> proxy server URL mappings. 1479 1480 Returns settings gathered from the environment, if specified, 1481 or the registry. 1482 1483 """ 1484 if getproxies_environment(): 1485 return proxy_bypass_environment(host) 1486 else: 1487 return proxy_bypass_registry(host) 1488 1489else: 1490 # By default use environment variables 1491 getproxies = getproxies_environment 1492 proxy_bypass = proxy_bypass_environment 1493 1494# Test and time quote() and unquote() 1495def test1(): 1496 s = '' 1497 for i in range(256): s = s + chr(i) 1498 s = s*4 1499 t0 = time.time() 1500 qs = quote(s) 1501 uqs = unquote(qs) 1502 t1 = time.time() 1503 if uqs != s: 1504 print 'Wrong!' 1505 print repr(s) 1506 print repr(qs) 1507 print repr(uqs) 1508 print round(t1 - t0, 3), 'sec' 1509 1510 1511def reporthook(blocknum, blocksize, totalsize): 1512 # Report during remote transfers 1513 print "Block number: %d, Block size: %d, Total size: %d" % ( 1514 blocknum, blocksize, totalsize) 1515 1516# Test program 1517def test(args=[]): 1518 if not args: 1519 args = [ 1520 '/etc/passwd', 1521 'file:/etc/passwd', 1522 'file://localhost/etc/passwd', 1523 'ftp://ftp.gnu.org/pub/README', 1524 'http://www.python.org/index.html', 1525 ] 1526 if hasattr(URLopener, "open_https"): 1527 args.append('https://synergy.as.cmu.edu/~geek/') 1528 try: 1529 for url in args: 1530 print '-'*10, url, '-'*10 1531 fn, h = urlretrieve(url, None, reporthook) 1532 print fn 1533 if h: 1534 print '======' 1535 for k in h.keys(): print k + ':', h[k] 1536 print '======' 1537 fp = open(fn, 'rb') 1538 data = fp.read() 1539 del fp 1540 if '\r' in data: 1541 table = string.maketrans("", "") 1542 data = data.translate(table, "\r") 1543 print data 1544 fn, h = None, None 1545 print '-'*40 1546 finally: 1547 urlcleanup() 1548 1549def main(): 1550 import getopt, sys 1551 try: 1552 opts, args = getopt.getopt(sys.argv[1:], "th") 1553 except getopt.error, msg: 1554 print msg 1555 print "Use -h for help" 1556 return 1557 t = 0 1558 for o, a in opts: 1559 if o == '-t': 1560 t = t + 1 1561 if o == '-h': 1562 print "Usage: python urllib.py [-t] [url ...]" 1563 print "-t runs self-test;", 1564 print "otherwise, contents of urls are printed" 1565 return 1566 if t: 1567 if t > 1: 1568 test1() 1569 test(args) 1570 else: 1571 if not args: 1572 print "Use -h for help" 1573 for url in args: 1574 print urlopen(url).read(), 1575 1576# Run test program when run as a script 1577if __name__ == '__main__': 1578 main() 1579