urllib.py revision 0564e12367dc10349cd5596bdbe9bbcb44e112c2
1# Open an arbitrary URL 2# 3# See the following document for a tentative description of URLs: 4# Uniform Resource Locators Tim Berners-Lee 5# INTERNET DRAFT CERN 6# IETF URL Working Group 14 July 1993 7# draft-ietf-uri-url-01.txt 8# 9# The object returned by URLopener().open(file) will differ per 10# protocol. All you know is that is has methods read(), readline(), 11# readlines(), fileno(), close() and info(). The read*(), fileno() 12# and close() methods work like those of open files. 13# The info() method returns an mimetools.Message object which can be 14# used to query various info about the object, if available. 15# (mimetools.Message objects are queried with the getheader() method.) 16 17import string 18import socket 19import regex 20import os 21import sys 22 23 24__version__ = '1.6' 25 26# Helper for non-unix systems 27if os.name == 'mac': 28 from macurl2path import url2pathname, pathname2url 29elif os.name == 'nt': 30 from nturl2path import url2pathname, pathname2url 31else: 32 def url2pathname(pathname): 33 return pathname 34 def pathname2url(pathname): 35 return pathname 36 37# This really consists of two pieces: 38# (1) a class which handles opening of all sorts of URLs 39# (plus assorted utilities etc.) 40# (2) a set of functions for parsing URLs 41# XXX Should these be separated out into different modules? 42 43 44# Shortcut for basic usage 45_urlopener = None 46def urlopen(url, data=None): 47 global _urlopener 48 if not _urlopener: 49 _urlopener = FancyURLopener() 50 if data is None: 51 return _urlopener.open(url) 52 else: 53 return _urlopener.open(url, data) 54def urlretrieve(url, filename=None): 55 global _urlopener 56 if not _urlopener: 57 _urlopener = FancyURLopener() 58 if filename: 59 return _urlopener.retrieve(url, filename) 60 else: 61 return _urlopener.retrieve(url) 62def urlcleanup(): 63 if _urlopener: 64 _urlopener.cleanup() 65 66 67# Class to open URLs. 68# This is a class rather than just a subroutine because we may need 69# more than one set of global protocol-specific options. 70# Note -- this is a base class for those who don't want the 71# automatic handling of errors type 302 (relocated) and 401 72# (authorization needed). 73ftpcache = {} 74class URLopener: 75 76 tempcache = None # So close() in __del__() won't fail 77 78 # Constructor 79 def __init__(self, proxies=None): 80 if proxies is None: 81 proxies = getproxies() 82 self.proxies = proxies 83 server_version = "Python-urllib/%s" % __version__ 84 self.addheaders = [('User-agent', server_version)] 85 self.tempcache = None 86 # Undocumented feature: if you assign {} to tempcache, 87 # it is used to cache files retrieved with 88 # self.retrieve(). This is not enabled by default 89 # since it does not work for changing documents (and I 90 # haven't got the logic to check expiration headers 91 # yet). 92 self.ftpcache = ftpcache 93 # Undocumented feature: you can use a different 94 # ftp cache by assigning to the .ftpcache member; 95 # in case you want logically independent URL openers 96 97 def __del__(self): 98 self.close() 99 100 def close(self): 101 self.cleanup() 102 103 def cleanup(self): 104 import os 105 if self.tempcache: 106 for url in self.tempcache.keys(): 107 try: 108 os.unlink(self.tempcache[url][0]) 109 except os.error: 110 pass 111 del self.tempcache[url] 112 113 # Add a header to be used by the HTTP interface only 114 # e.g. u.addheader('Accept', 'sound/basic') 115 def addheader(self, *args): 116 self.addheaders.append(args) 117 118 # External interface 119 # Use URLopener().open(file) instead of open(file, 'r') 120 def open(self, fullurl, data=None): 121 fullurl = unwrap(fullurl) 122 type, url = splittype(fullurl) 123 if not type: type = 'file' 124 self.openedurl = '%s:%s' % (type, url) 125 if self.proxies.has_key(type): 126 proxy = self.proxies[type] 127 type, proxy = splittype(proxy) 128 host, selector = splithost(proxy) 129 url = (host, fullurl) # Signal special case to open_*() 130 name = 'open_' + type 131 if '-' in name: 132 import regsub 133 name = regsub.gsub('-', '_', name) 134 if not hasattr(self, name): 135 if data is None: 136 return self.open_unknown(fullurl) 137 else: 138 return self.open_unknown(fullurl, data) 139 try: 140 if data is None: 141 return getattr(self, name)(url) 142 else: 143 return getattr(self, name)(url, data) 144 except socket.error, msg: 145 raise IOError, ('socket error', msg), sys.exc_traceback 146 147 # Overridable interface to open unknown URL type 148 def open_unknown(self, fullurl, data=None): 149 type, url = splittype(fullurl) 150 raise IOError, ('url error', 'unknown url type', type) 151 152 # External interface 153 # retrieve(url) returns (filename, None) for a local object 154 # or (tempfilename, headers) for a remote object 155 def retrieve(self, url, filename=None): 156 if self.tempcache and self.tempcache.has_key(url): 157 return self.tempcache[url] 158 url1 = unwrap(url) 159 self.openedurl = url1 160 if self.tempcache and self.tempcache.has_key(url1): 161 self.tempcache[url] = self.tempcache[url1] 162 return self.tempcache[url1] 163 type, url1 = splittype(url1) 164 if not filename and (not type or type == 'file'): 165 try: 166 fp = self.open_local_file(url1) 167 del fp 168 return url2pathname(splithost(url1)[1]), None 169 except IOError, msg: 170 pass 171 fp = self.open(url) 172 headers = fp.info() 173 if not filename: 174 import tempfile 175 filename = tempfile.mktemp() 176 result = filename, headers 177 if self.tempcache is not None: 178 self.tempcache[url] = result 179 tfp = open(filename, 'w') 180 bs = 1024*8 181 block = fp.read(bs) 182 while block: 183 tfp.write(block) 184 block = fp.read(bs) 185 del fp 186 del tfp 187 return result 188 189 # Each method named open_<type> knows how to open that type of URL 190 191 # Use HTTP protocol 192 def open_http(self, url, data=None): 193 import httplib 194 if type(url) is type(""): 195 host, selector = splithost(url) 196 user_passwd, host = splituser(host) 197 else: 198 host, selector = url 199 urltype, rest = splittype(selector) 200 if string.lower(urltype) == 'http': 201 realhost, rest = splithost(rest) 202 user_passwd, realhost = splituser(realhost) 203 if user_passwd: 204 selector = "%s://%s%s" % (urltype, 205 realhost, rest) 206 print "proxy via http:", host, selector 207 if not host: raise IOError, ('http error', 'no host given') 208 if user_passwd: 209 import base64 210 auth = string.strip(base64.encodestring(user_passwd)) 211 else: 212 auth = None 213 h = httplib.HTTP(host) 214 if data is not None: 215 h.putrequest('POST', selector) 216 h.putheader('Content-type', 217 'application/x-www-form-urlencoded') 218 h.putheader('Content-length', '%d' % len(data)) 219 else: 220 h.putrequest('GET', selector) 221 if auth: h.putheader('Authorization', 'Basic %s' % auth) 222 for args in self.addheaders: apply(h.putheader, args) 223 h.endheaders() 224 if data is not None: 225 h.send(data + '\r\n') 226 errcode, errmsg, headers = h.getreply() 227 fp = h.getfile() 228 if errcode == 200: 229 return addinfourl(fp, headers, self.openedurl) 230 else: 231 return self.http_error(url, 232 fp, errcode, errmsg, headers) 233 234 # Handle http errors. 235 # Derived class can override this, or provide specific handlers 236 # named http_error_DDD where DDD is the 3-digit error code 237 def http_error(self, url, fp, errcode, errmsg, headers): 238 # First check if there's a specific handler for this error 239 name = 'http_error_%d' % errcode 240 if hasattr(self, name): 241 method = getattr(self, name) 242 result = method(url, fp, errcode, errmsg, headers) 243 if result: return result 244 return self.http_error_default( 245 url, fp, errcode, errmsg, headers) 246 247 # Default http error handler: close the connection and raises IOError 248 def http_error_default(self, url, fp, errcode, errmsg, headers): 249 void = fp.read() 250 fp.close() 251 raise IOError, ('http error', errcode, errmsg, headers) 252 253 # Use Gopher protocol 254 def open_gopher(self, url): 255 import gopherlib 256 host, selector = splithost(url) 257 if not host: raise IOError, ('gopher error', 'no host given') 258 type, selector = splitgophertype(selector) 259 selector, query = splitquery(selector) 260 selector = unquote(selector) 261 if query: 262 query = unquote(query) 263 fp = gopherlib.send_query(selector, query, host) 264 else: 265 fp = gopherlib.send_selector(selector, host) 266 return addinfourl(fp, noheaders(), self.openedurl) 267 268 # Use local file or FTP depending on form of URL 269 def open_file(self, url): 270 if url[:2] == '//': 271 return self.open_ftp(url) 272 else: 273 return self.open_local_file(url) 274 275 # Use local file 276 def open_local_file(self, url): 277 host, file = splithost(url) 278 if not host: 279 return addinfourl(open(url2pathname(file), 'r'), noheaders(), 'file:'+file) 280 host, port = splitport(host) 281 if not port and socket.gethostbyname(host) in ( 282 localhost(), thishost()): 283 file = unquote(file) 284 return addinfourl(open(url2pathname(file), 'r'), noheaders(), 'file:'+file) 285 raise IOError, ('local file error', 'not on local host') 286 287 # Use FTP protocol 288 def open_ftp(self, url): 289 host, path = splithost(url) 290 if not host: raise IOError, ('ftp error', 'no host given') 291 host, port = splitport(host) 292 user, host = splituser(host) 293 if user: user, passwd = splitpasswd(user) 294 else: passwd = None 295 host = socket.gethostbyname(host) 296 if not port: 297 import ftplib 298 port = ftplib.FTP_PORT 299 path, attrs = splitattr(path) 300 dirs = string.splitfields(path, '/') 301 dirs, file = dirs[:-1], dirs[-1] 302 if dirs and not dirs[0]: dirs = dirs[1:] 303 key = (user, host, port, string.joinfields(dirs, '/')) 304 try: 305 if not self.ftpcache.has_key(key): 306 self.ftpcache[key] = \ 307 ftpwrapper(user, passwd, 308 host, port, dirs) 309 if not file: type = 'D' 310 else: type = 'I' 311 for attr in attrs: 312 attr, value = splitvalue(attr) 313 if string.lower(attr) == 'type' and \ 314 value in ('a', 'A', 'i', 'I', 'd', 'D'): 315 type = string.upper(value) 316 return addinfourl(self.ftpcache[key].retrfile(file, type), 317 noheaders(), self.openedurl) 318 except ftperrors(), msg: 319 raise IOError, ('ftp error', msg), sys.exc_traceback 320 321 322# Derived class with handlers for errors we can handle (perhaps) 323class FancyURLopener(URLopener): 324 325 def __init__(self, *args): 326 apply(URLopener.__init__, (self,) + args) 327 self.auth_cache = {} 328 329 # Default error handling -- don't raise an exception 330 def http_error_default(self, url, fp, errcode, errmsg, headers): 331 return addinfourl(fp, headers, self.openedurl) 332 333 # Error 302 -- relocated (temporarily) 334 def http_error_302(self, url, fp, errcode, errmsg, headers): 335 # XXX The server can force infinite recursion here! 336 if headers.has_key('location'): 337 newurl = headers['location'] 338 elif headers.has_key('uri'): 339 newurl = headers['uri'] 340 else: 341 return 342 void = fp.read() 343 fp.close() 344 return self.open(newurl) 345 346 # Error 301 -- also relocated (permanently) 347 http_error_301 = http_error_302 348 349 # Error 401 -- authentication required 350 # See this URL for a description of the basic authentication scheme: 351 # http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 352 def http_error_401(self, url, fp, errcode, errmsg, headers): 353 if headers.has_key('www-authenticate'): 354 stuff = headers['www-authenticate'] 355 p = regex.compile( 356 '[ \t]*\([^ \t]+\)[ \t]+realm="\([^"]*\)"') 357 if p.match(stuff) >= 0: 358 scheme, realm = p.group(1, 2) 359 if string.lower(scheme) == 'basic': 360 return self.retry_http_basic_auth( 361 url, realm) 362 363 def retry_http_basic_auth(self, url, realm): 364 host, selector = splithost(url) 365 i = string.find(host, '@') + 1 366 host = host[i:] 367 user, passwd = self.get_user_passwd(host, realm, i) 368 if not (user or passwd): return None 369 host = user + ':' + passwd + '@' + host 370 newurl = '//' + host + selector 371 return self.open_http(newurl) 372 373 def get_user_passwd(self, host, realm, clear_cache = 0): 374 key = realm + '@' + string.lower(host) 375 if self.auth_cache.has_key(key): 376 if clear_cache: 377 del self.auth_cache[key] 378 else: 379 return self.auth_cache[key] 380 user, passwd = self.prompt_user_passwd(host, realm) 381 if user or passwd: self.auth_cache[key] = (user, passwd) 382 return user, passwd 383 384 def prompt_user_passwd(self, host, realm): 385 # Override this in a GUI environment! 386 try: 387 user = raw_input("Enter username for %s at %s: " % 388 (realm, host)) 389 self.echo_off() 390 try: 391 passwd = raw_input( 392 "Enter password for %s in %s at %s: " % 393 (user, realm, host)) 394 finally: 395 self.echo_on() 396 return user, passwd 397 except KeyboardInterrupt: 398 return None, None 399 400 def echo_off(self): 401 import os 402 os.system("stty -echo") 403 404 def echo_on(self): 405 import os 406 print 407 os.system("stty echo") 408 409 410# Utility functions 411 412# Return the IP address of the magic hostname 'localhost' 413_localhost = None 414def localhost(): 415 global _localhost 416 if not _localhost: 417 _localhost = socket.gethostbyname('localhost') 418 return _localhost 419 420# Return the IP address of the current host 421_thishost = None 422def thishost(): 423 global _thishost 424 if not _thishost: 425 _thishost = socket.gethostbyname(socket.gethostname()) 426 return _thishost 427 428# Return the set of errors raised by the FTP class 429_ftperrors = None 430def ftperrors(): 431 global _ftperrors 432 if not _ftperrors: 433 import ftplib 434 _ftperrors = (ftplib.error_reply, 435 ftplib.error_temp, 436 ftplib.error_perm, 437 ftplib.error_proto) 438 return _ftperrors 439 440# Return an empty mimetools.Message object 441_noheaders = None 442def noheaders(): 443 global _noheaders 444 if not _noheaders: 445 import mimetools 446 import StringIO 447 _noheaders = mimetools.Message(StringIO.StringIO(), 0) 448 _noheaders.fp.close() # Recycle file descriptor 449 return _noheaders 450 451 452# Utility classes 453 454# Class used by open_ftp() for cache of open FTP connections 455class ftpwrapper: 456 def __init__(self, user, passwd, host, port, dirs): 457 self.user = unquote(user or '') 458 self.passwd = unquote(passwd or '') 459 self.host = host 460 self.port = port 461 self.dirs = [] 462 for dir in dirs: 463 self.dirs.append(unquote(dir)) 464 self.init() 465 def init(self): 466 import ftplib 467 self.ftp = ftplib.FTP() 468 self.ftp.connect(self.host, self.port) 469 self.ftp.login(self.user, self.passwd) 470 for dir in self.dirs: 471 self.ftp.cwd(dir) 472 def retrfile(self, file, type): 473 import ftplib 474 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 475 else: cmd = 'TYPE ' + type; isdir = 0 476 try: 477 self.ftp.voidcmd(cmd) 478 except ftplib.all_errors: 479 self.init() 480 self.ftp.voidcmd(cmd) 481 conn = None 482 if file and not isdir: 483 try: 484 cmd = 'RETR ' + file 485 conn = self.ftp.transfercmd(cmd) 486 except ftplib.error_perm, reason: 487 if reason[:3] != '550': 488 raise IOError, ('ftp error', reason), \ 489 sys.exc_traceback 490 if not conn: 491 # Try a directory listing 492 if file: cmd = 'LIST ' + file 493 else: cmd = 'LIST' 494 conn = self.ftp.transfercmd(cmd) 495 return addclosehook(conn.makefile('rb'), self.ftp.voidresp) 496 497# Base class for addinfo and addclosehook 498class addbase: 499 def __init__(self, fp): 500 self.fp = fp 501 self.read = self.fp.read 502 self.readline = self.fp.readline 503 self.readlines = self.fp.readlines 504 self.fileno = self.fp.fileno 505 def __repr__(self): 506 return '<%s at %s whose fp = %s>' % ( 507 self.__class__.__name__, `id(self)`, `self.fp`) 508 def close(self): 509 self.read = None 510 self.readline = None 511 self.readlines = None 512 self.fileno = None 513 if self.fp: self.fp.close() 514 self.fp = None 515 516# Class to add a close hook to an open file 517class addclosehook(addbase): 518 def __init__(self, fp, closehook, *hookargs): 519 addbase.__init__(self, fp) 520 self.closehook = closehook 521 self.hookargs = hookargs 522 def close(self): 523 if self.closehook: 524 apply(self.closehook, self.hookargs) 525 self.closehook = None 526 self.hookargs = None 527 addbase.close(self) 528 529# class to add an info() method to an open file 530class addinfo(addbase): 531 def __init__(self, fp, headers): 532 addbase.__init__(self, fp) 533 self.headers = headers 534 def info(self): 535 return self.headers 536 537# class to add info() and geturl() methods to an open file 538class addinfourl(addbase): 539 def __init__(self, fp, headers, url): 540 addbase.__init__(self, fp) 541 self.headers = headers 542 self.url = url 543 def info(self): 544 return self.headers 545 def geturl(self): 546 return self.url 547 548 549# Utility to combine a URL with a base URL to form a new URL 550 551def basejoin(base, url): 552 type, path = splittype(url) 553 if type: 554 # if url is complete (i.e., it contains a type), return it 555 return url 556 host, path = splithost(path) 557 type, basepath = splittype(base) # inherit type from base 558 if host: 559 # if url contains host, just inherit type 560 if type: return type + '://' + host + path 561 else: 562 # no type inherited, so url must have started with // 563 # just return it 564 return url 565 host, basepath = splithost(basepath) # inherit host 566 basepath, basetag = splittag(basepath) # remove extraneuous cruft 567 basepath, basequery = splitquery(basepath) # idem 568 if path[:1] != '/': 569 # non-absolute path name 570 if path[:1] in ('#', '?'): 571 # path is just a tag or query, attach to basepath 572 i = len(basepath) 573 else: 574 # else replace last component 575 i = string.rfind(basepath, '/') 576 if i < 0: 577 # basepath not absolute 578 if host: 579 # host present, make absolute 580 basepath = '/' 581 else: 582 # else keep non-absolute 583 basepath = '' 584 else: 585 # remove last file component 586 basepath = basepath[:i+1] 587 path = basepath + path 588 if type and host: return type + '://' + host + path 589 elif type: return type + ':' + path 590 elif host: return '//' + host + path # don't know what this means 591 else: return path 592 593 594# Utilities to parse URLs (most of these return None for missing parts): 595# unwrap('<URL:type://host/path>') --> 'type://host/path' 596# splittype('type:opaquestring') --> 'type', 'opaquestring' 597# splithost('//host[:port]/path') --> 'host[:port]', '/path' 598# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 599# splitpasswd('user:passwd') -> 'user', 'passwd' 600# splitport('host:port') --> 'host', 'port' 601# splitquery('/path?query') --> '/path', 'query' 602# splittag('/path#tag') --> '/path', 'tag' 603# splitattr('/path;attr1=value1;attr2=value2;...') -> 604# '/path', ['attr1=value1', 'attr2=value2', ...] 605# splitvalue('attr=value') --> 'attr', 'value' 606# splitgophertype('/Xselector') --> 'X', 'selector' 607# unquote('abc%20def') -> 'abc def' 608# quote('abc def') -> 'abc%20def') 609 610def unwrap(url): 611 url = string.strip(url) 612 if url[:1] == '<' and url[-1:] == '>': 613 url = string.strip(url[1:-1]) 614 if url[:4] == 'URL:': url = string.strip(url[4:]) 615 return url 616 617_typeprog = regex.compile('^\([^/:]+\):\(.*\)$') 618def splittype(url): 619 if _typeprog.match(url) >= 0: return _typeprog.group(1, 2) 620 return None, url 621 622_hostprog = regex.compile('^//\([^/]+\)\(.*\)$') 623def splithost(url): 624 if _hostprog.match(url) >= 0: return _hostprog.group(1, 2) 625 return None, url 626 627_userprog = regex.compile('^\([^@]*\)@\(.*\)$') 628def splituser(host): 629 if _userprog.match(host) >= 0: return _userprog.group(1, 2) 630 return None, host 631 632_passwdprog = regex.compile('^\([^:]*\):\(.*\)$') 633def splitpasswd(user): 634 if _passwdprog.match(user) >= 0: return _passwdprog.group(1, 2) 635 return user, None 636 637_portprog = regex.compile('^\(.*\):\([0-9]+\)$') 638def splitport(host): 639 if _portprog.match(host) >= 0: return _portprog.group(1, 2) 640 return host, None 641 642# Split host and port, returning numeric port. 643# Return given default port if no ':' found; defaults to -1. 644# Return numerical port if a valid number are found after ':'. 645# Return None if ':' but not a valid number. 646_nportprog = regex.compile('^\(.*\):\(.*\)$') 647def splitnport(host, defport=-1): 648 if _nportprog.match(host) >= 0: 649 host, port = _nportprog.group(1, 2) 650 try: 651 if not port: raise string.atoi_error, "no digits" 652 nport = string.atoi(port) 653 except string.atoi_error: 654 nport = None 655 return host, nport 656 return host, defport 657 658_queryprog = regex.compile('^\(.*\)\?\([^?]*\)$') 659def splitquery(url): 660 if _queryprog.match(url) >= 0: return _queryprog.group(1, 2) 661 return url, None 662 663_tagprog = regex.compile('^\(.*\)#\([^#]*\)$') 664def splittag(url): 665 if _tagprog.match(url) >= 0: return _tagprog.group(1, 2) 666 return url, None 667 668def splitattr(url): 669 words = string.splitfields(url, ';') 670 return words[0], words[1:] 671 672_valueprog = regex.compile('^\([^=]*\)=\(.*\)$') 673def splitvalue(attr): 674 if _valueprog.match(attr) >= 0: return _valueprog.group(1, 2) 675 return attr, None 676 677def splitgophertype(selector): 678 if selector[:1] == '/' and selector[1:2]: 679 return selector[1], selector[2:] 680 return None, selector 681 682_quoteprog = regex.compile('%[0-9a-fA-F][0-9a-fA-F]') 683def unquote(s): 684 i = 0 685 n = len(s) 686 res = [] 687 while 0 <= i < n: 688 j = _quoteprog.search(s, i) 689 if j < 0: 690 res.append(s[i:]) 691 break 692 res.append(s[i:j] + chr(string.atoi(s[j+1:j+3], 16))) 693 i = j+3 694 return string.joinfields(res, '') 695 696def unquote_plus(s): 697 if '+' in s: 698 import regsub 699 s = regsub.gsub('+', ' ', s) 700 return unquote(s) 701 702always_safe = string.letters + string.digits + '_,.-' 703def quote(s, safe = '/'): 704 safe = always_safe + safe 705 res = [] 706 for c in s: 707 if c in safe: 708 res.append(c) 709 else: 710 res.append('%%%02x' % ord(c)) 711 return string.joinfields(res, '') 712 713def quote_plus(s, safe = '/'): 714 if ' ' in s: 715 import regsub 716 s = regsub.gsub(' ', '+', s) 717 return quote(s, safe + '+') 718 else: 719 return quote(s, safe) 720 721 722# Proxy handling 723def getproxies(): 724 """Return a dictionary of protocol scheme -> proxy server URL mappings. 725 726 Scan the environment for variables named <scheme>_proxy; 727 this seems to be the standard convention. If you need a 728 different way, you can pass a proxies dictionary to the 729 [Fancy]URLopener constructor. 730 731 """ 732 proxies = {} 733 for name, value in os.environ.items(): 734 if value and name[-6:] == '_proxy': 735 proxies[name[:-6]] = value 736 return proxies 737 738 739# Test and time quote() and unquote() 740def test1(): 741 import time 742 s = '' 743 for i in range(256): s = s + chr(i) 744 s = s*4 745 t0 = time.time() 746 qs = quote(s) 747 uqs = unquote(qs) 748 t1 = time.time() 749 if uqs != s: 750 print 'Wrong!' 751 print `s` 752 print `qs` 753 print `uqs` 754 print round(t1 - t0, 3), 'sec' 755 756 757# Test program 758def test(): 759 import sys 760 import regsub 761 args = sys.argv[1:] 762 if not args: 763 args = [ 764 '/etc/passwd', 765 'file:/etc/passwd', 766 'file://localhost/etc/passwd', 767 'ftp://ftp.cwi.nl/etc/passwd', 768 'gopher://gopher.cwi.nl/11/', 769 'http://www.cwi.nl/index.html', 770 ] 771 try: 772 for url in args: 773 print '-'*10, url, '-'*10 774 fn, h = urlretrieve(url) 775 print fn, h 776 if h: 777 print '======' 778 for k in h.keys(): print k + ':', h[k] 779 print '======' 780 fp = open(fn, 'r') 781 data = fp.read() 782 del fp 783 print regsub.gsub('\r', '', data) 784 fn, h = None, None 785 print '-'*40 786 finally: 787 urlcleanup() 788 789# Run test program when run as a script 790if __name__ == '__main__': 791## test1() 792 test() 793