iobench.py revision 31b3763fa304038a0f1d8a0be5e4a2b41026f107
1# -*- coding: utf-8 -*- 2# This file should be kept compatible with both Python 2.6 and Python >= 3.0. 3 4import time 5import os 6import re 7import sys 8import hashlib 9import functools 10import itertools 11from optparse import OptionParser 12 13out = sys.stdout 14 15TEXT_ENCODING = 'utf8' 16NEWLINES = 'lf' 17 18# Compatibility 19try: 20 xrange 21except NameError: 22 xrange = range 23 24def text_open(fn, mode, encoding=None): 25 try: 26 return open(fn, mode, encoding=encoding or TEXT_ENCODING) 27 except TypeError: 28 return open(fn, mode) 29 30def get_file_sizes(): 31 for s in ['20 KB', '400 KB', '10 MB']: 32 size, unit = s.split() 33 size = int(size) * {'KB': 1024, 'MB': 1024 ** 2}[unit] 34 yield s.replace(' ', ''), size 35 36def get_binary_files(): 37 return ((name + ".bin", size) for name, size in get_file_sizes()) 38 39def get_text_files(): 40 return (("%s-%s-%s.txt" % (name, TEXT_ENCODING, NEWLINES), size) 41 for name, size in get_file_sizes()) 42 43def with_open_mode(mode): 44 def decorate(f): 45 f.file_open_mode = mode 46 return f 47 return decorate 48 49def with_sizes(*sizes): 50 def decorate(f): 51 f.file_sizes = sizes 52 return f 53 return decorate 54 55 56# Here begin the tests 57 58@with_open_mode("r") 59@with_sizes("medium") 60def read_bytewise(f): 61 """ read one unit at a time """ 62 f.seek(0) 63 while f.read(1): 64 pass 65 66@with_open_mode("r") 67@with_sizes("medium") 68def read_small_chunks(f): 69 """ read 20 units at a time """ 70 f.seek(0) 71 while f.read(20): 72 pass 73 74@with_open_mode("r") 75@with_sizes("medium") 76def read_big_chunks(f): 77 """ read 4096 units at a time """ 78 f.seek(0) 79 while f.read(4096): 80 pass 81 82@with_open_mode("r") 83@with_sizes("small", "medium", "large") 84def read_whole_file(f): 85 """ read whole contents at once """ 86 f.seek(0) 87 while f.read(): 88 pass 89 90@with_open_mode("rt") 91@with_sizes("medium") 92def read_lines(f): 93 """ read one line at a time """ 94 f.seek(0) 95 for line in f: 96 pass 97 98@with_open_mode("r") 99@with_sizes("medium") 100def seek_forward_bytewise(f): 101 """ seek forward one unit at a time """ 102 f.seek(0, 2) 103 size = f.tell() 104 f.seek(0, 0) 105 for i in xrange(0, size - 1): 106 f.seek(i, 0) 107 108@with_open_mode("r") 109@with_sizes("medium") 110def seek_forward_blockwise(f): 111 """ seek forward 1000 units at a time """ 112 f.seek(0, 2) 113 size = f.tell() 114 f.seek(0, 0) 115 for i in xrange(0, size - 1, 1000): 116 f.seek(i, 0) 117 118@with_open_mode("rb") 119@with_sizes("medium") 120def read_seek_bytewise(f): 121 """ alternate read & seek one unit """ 122 f.seek(0) 123 while f.read(1): 124 f.seek(1, 1) 125 126@with_open_mode("rb") 127@with_sizes("medium") 128def read_seek_blockwise(f): 129 """ alternate read & seek 1000 units """ 130 f.seek(0) 131 while f.read(1000): 132 f.seek(1000, 1) 133 134 135@with_open_mode("w") 136@with_sizes("small") 137def write_bytewise(f, source): 138 """ write one unit at a time """ 139 for i in xrange(0, len(source)): 140 f.write(source[i:i+1]) 141 142@with_open_mode("w") 143@with_sizes("medium") 144def write_small_chunks(f, source): 145 """ write 20 units at a time """ 146 for i in xrange(0, len(source), 20): 147 f.write(source[i:i+20]) 148 149@with_open_mode("w") 150@with_sizes("medium") 151def write_medium_chunks(f, source): 152 """ write 4096 units at a time """ 153 for i in xrange(0, len(source), 4096): 154 f.write(source[i:i+4096]) 155 156@with_open_mode("w") 157@with_sizes("large") 158def write_large_chunks(f, source): 159 """ write 1e6 units at a time """ 160 for i in xrange(0, len(source), 1000000): 161 f.write(source[i:i+1000000]) 162 163 164@with_open_mode("w+") 165@with_sizes("small") 166def modify_bytewise(f, source): 167 """ modify one unit at a time """ 168 f.seek(0) 169 for i in xrange(0, len(source)): 170 f.write(source[i:i+1]) 171 172@with_open_mode("w+") 173@with_sizes("medium") 174def modify_small_chunks(f, source): 175 """ modify 20 units at a time """ 176 f.seek(0) 177 for i in xrange(0, len(source), 20): 178 f.write(source[i:i+20]) 179 180@with_open_mode("w+") 181@with_sizes("medium") 182def modify_medium_chunks(f, source): 183 """ modify 4096 units at a time """ 184 f.seek(0) 185 for i in xrange(0, len(source), 4096): 186 f.write(source[i:i+4096]) 187 188@with_open_mode("wb+") 189@with_sizes("medium") 190def modify_seek_forward_bytewise(f, source): 191 """ alternate write & seek one unit """ 192 f.seek(0) 193 for i in xrange(0, len(source), 2): 194 f.write(source[i:i+1]) 195 f.seek(i+2) 196 197@with_open_mode("wb+") 198@with_sizes("medium") 199def modify_seek_forward_blockwise(f, source): 200 """ alternate write & seek 1000 units """ 201 f.seek(0) 202 for i in xrange(0, len(source), 2000): 203 f.write(source[i:i+1000]) 204 f.seek(i+2000) 205 206# XXX the 2 following tests don't work with py3k's text IO 207@with_open_mode("wb+") 208@with_sizes("medium") 209def read_modify_bytewise(f, source): 210 """ alternate read & write one unit """ 211 f.seek(0) 212 for i in xrange(0, len(source), 2): 213 f.read(1) 214 f.write(source[i+1:i+2]) 215 216@with_open_mode("wb+") 217@with_sizes("medium") 218def read_modify_blockwise(f, source): 219 """ alternate read & write 1000 units """ 220 f.seek(0) 221 for i in xrange(0, len(source), 2000): 222 f.read(1000) 223 f.write(source[i+1000:i+2000]) 224 225 226read_tests = [ 227 read_bytewise, read_small_chunks, read_lines, read_big_chunks, 228 None, read_whole_file, None, 229 seek_forward_bytewise, seek_forward_blockwise, 230 read_seek_bytewise, read_seek_blockwise, 231] 232 233write_tests = [ 234 write_bytewise, write_small_chunks, write_medium_chunks, write_large_chunks, 235] 236 237modify_tests = [ 238 modify_bytewise, modify_small_chunks, modify_medium_chunks, 239 None, 240 modify_seek_forward_bytewise, modify_seek_forward_blockwise, 241 read_modify_bytewise, read_modify_blockwise, 242] 243 244def run_during(duration, func): 245 _t = time.time 246 n = 0 247 start = os.times() 248 start_timestamp = _t() 249 real_start = start[4] or start_timestamp 250 while True: 251 func() 252 n += 1 253 if _t() - start_timestamp > duration: 254 break 255 end = os.times() 256 real = (end[4] if start[4] else time.time()) - real_start 257 return n, real, sum(end[0:2]) - sum(start[0:2]) 258 259def warm_cache(filename): 260 with open(filename, "rb") as f: 261 f.read() 262 263 264def run_all_tests(options): 265 def print_label(filename, func): 266 name = re.split(r'[-.]', filename)[0] 267 out.write( 268 ("[%s] %s... " 269 % (name.center(7), func.__doc__.strip()) 270 ).ljust(52)) 271 out.flush() 272 273 def print_results(size, n, real, cpu): 274 bw = n * float(size) / 1024 ** 2 / real 275 bw = ("%4d MB/s" if bw > 100 else "%.3g MB/s") % bw 276 out.write(bw.rjust(12) + "\n") 277 if cpu < 0.90 * real: 278 out.write(" warning: test above used only %d%% CPU, " 279 "result may be flawed!\n" % (100.0 * cpu / real)) 280 281 def run_one_test(name, size, open_func, test_func, *args): 282 mode = test_func.file_open_mode 283 print_label(name, test_func) 284 if "w" not in mode or "+" in mode: 285 warm_cache(name) 286 with open_func(name) as f: 287 n, real, cpu = run_during(1.5, lambda: test_func(f, *args)) 288 print_results(size, n, real, cpu) 289 290 def run_test_family(tests, mode_filter, files, open_func, *make_args): 291 for test_func in tests: 292 if test_func is None: 293 out.write("\n") 294 continue 295 if mode_filter in test_func.file_open_mode: 296 continue 297 for s in test_func.file_sizes: 298 name, size = files[size_names[s]] 299 #name += file_ext 300 args = tuple(f(name, size) for f in make_args) 301 run_one_test(name, size, 302 open_func, test_func, *args) 303 304 size_names = { 305 "small": 0, 306 "medium": 1, 307 "large": 2, 308 } 309 310 binary_files = list(get_binary_files()) 311 text_files = list(get_text_files()) 312 if "b" in options: 313 print("Binary unit = one byte") 314 if "t" in options: 315 print("Text unit = one character (%s-decoded)" % TEXT_ENCODING) 316 317 # Binary reads 318 if "b" in options and "r" in options: 319 print("\n** Binary input **\n") 320 run_test_family(read_tests, "t", binary_files, lambda fn: open(fn, "rb")) 321 322 # Text reads 323 if "t" in options and "r" in options: 324 print("\n** Text input **\n") 325 run_test_family(read_tests, "b", text_files, lambda fn: text_open(fn, "r")) 326 327 # Binary writes 328 if "b" in options and "w" in options: 329 print("\n** Binary append **\n") 330 def make_test_source(name, size): 331 with open(name, "rb") as f: 332 return f.read() 333 run_test_family(write_tests, "t", binary_files, 334 lambda fn: open(os.devnull, "wb"), make_test_source) 335 336 # Text writes 337 if "t" in options and "w" in options: 338 print("\n** Text append **\n") 339 def make_test_source(name, size): 340 with text_open(name, "r") as f: 341 return f.read() 342 run_test_family(write_tests, "b", text_files, 343 lambda fn: text_open(os.devnull, "w"), make_test_source) 344 345 # Binary overwrites 346 if "b" in options and "w" in options: 347 print("\n** Binary overwrite **\n") 348 def make_test_source(name, size): 349 with open(name, "rb") as f: 350 return f.read() 351 run_test_family(modify_tests, "t", binary_files, 352 lambda fn: open(fn, "r+b"), make_test_source) 353 354 # Text overwrites 355 if "t" in options and "w" in options: 356 print("\n** Text overwrite **\n") 357 def make_test_source(name, size): 358 with text_open(name, "r") as f: 359 return f.read() 360 run_test_family(modify_tests, "b", text_files, 361 lambda fn: text_open(fn, "r+"), make_test_source) 362 363 364def prepare_files(): 365 print("Preparing files...") 366 # Binary files 367 for name, size in get_binary_files(): 368 if os.path.isfile(name) and os.path.getsize(name) == size: 369 continue 370 with open(name, "wb") as f: 371 f.write(os.urandom(size)) 372 # Text files 373 chunk = [] 374 with text_open(__file__, "rU", encoding='utf8') as f: 375 for line in f: 376 if line.startswith("# <iobench text chunk marker>"): 377 break 378 else: 379 raise RuntimeError( 380 "Couldn't find chunk marker in %s !" % __file__) 381 if NEWLINES == "all": 382 it = itertools.cycle(["\n", "\r", "\r\n"]) 383 else: 384 it = itertools.repeat( 385 {"cr": "\r", "lf": "\n", "crlf": "\r\n"}[NEWLINES]) 386 chunk = "".join(line.replace("\n", next(it)) for line in f) 387 if isinstance(chunk, bytes): 388 chunk = chunk.decode('utf8') 389 chunk = chunk.encode(TEXT_ENCODING) 390 for name, size in get_text_files(): 391 if os.path.isfile(name) and os.path.getsize(name) == size: 392 continue 393 head = chunk * (size // len(chunk)) 394 tail = chunk[:size % len(chunk)] 395 # Adjust tail to end on a character boundary 396 while True: 397 try: 398 tail.decode(TEXT_ENCODING) 399 break 400 except UnicodeDecodeError: 401 tail = tail[:-1] 402 with open(name, "wb") as f: 403 f.write(head) 404 f.write(tail) 405 406def main(): 407 global TEXT_ENCODING, NEWLINES 408 409 usage = "usage: %prog [-h|--help] [options]" 410 parser = OptionParser(usage=usage) 411 parser.add_option("-b", "--binary", 412 action="store_true", dest="binary", default=False, 413 help="run binary I/O tests") 414 parser.add_option("-t", "--text", 415 action="store_true", dest="text", default=False, 416 help="run text I/O tests") 417 parser.add_option("-r", "--read", 418 action="store_true", dest="read", default=False, 419 help="run read tests") 420 parser.add_option("-w", "--write", 421 action="store_true", dest="write", default=False, 422 help="run write & modify tests") 423 parser.add_option("-E", "--encoding", 424 action="store", dest="encoding", default=None, 425 help="encoding for text tests (default: %s)" % TEXT_ENCODING) 426 parser.add_option("-N", "--newlines", 427 action="store", dest="newlines", default='lf', 428 help="line endings for text tests " 429 "(one of: {lf (default), cr, crlf, all})") 430 options, args = parser.parse_args() 431 if args: 432 parser.error("unexpected arguments") 433 NEWLINES = options.newlines.lower() 434 if NEWLINES not in ('lf', 'cr', 'crlf', 'all'): 435 parser.error("invalid 'newlines' option: %r" % NEWLINES) 436 437 test_options = "" 438 if options.read: 439 test_options += "r" 440 if options.write: 441 test_options += "w" 442 elif not options.read: 443 test_options += "rw" 444 if options.text: 445 test_options += "t" 446 if options.binary: 447 test_options += "b" 448 elif not options.text: 449 test_options += "tb" 450 451 if options.encoding: 452 TEXT_ENCODING = options.encoding 453 454 prepare_files() 455 run_all_tests(test_options) 456 457if __name__ == "__main__": 458 main() 459 460 461# -- This part to exercise text reading. Don't change anything! -- 462# <iobench text chunk marker> 463 464""" 4651. 466Gáttir allar, 467áðr gangi fram, 468um skoðask skyli, 469um skyggnast skyli, 470því at óvíst er at vita, 471hvar óvinir 472sitja á fleti fyrir. 473 4742. 475Gefendr heilir! 476Gestr er inn kominn, 477hvar skal sitja sjá? 478Mjök er bráðr, 479sá er á bröndum skal 480síns of freista frama. 481 4823. 483Elds er þörf, 484þeims inn er kominn 485ok á kné kalinn; 486matar ok váða 487er manni þörf, 488þeim er hefr um fjall farit. 489 4904. 491Vatns er þörf, 492þeim er til verðar kemr, 493þerru ok þjóðlaðar, 494góðs of æðis, 495ef sér geta mætti, 496orðs ok endrþögu. 497 4985. 499Vits er þörf, 500þeim er víða ratar; 501dælt er heima hvat; 502at augabragði verðr, 503sá er ekki kann 504ok með snotrum sitr. 505 5066. 507At hyggjandi sinni 508skyli-t maðr hræsinn vera, 509heldr gætinn at geði; 510þá er horskr ok þögull 511kemr heimisgarða til, 512sjaldan verðr víti vörum, 513því at óbrigðra vin 514fær maðr aldregi 515en mannvit mikit. 516 5177. 518Inn vari gestr, 519er til verðar kemr, 520þunnu hljóði þegir, 521eyrum hlýðir, 522en augum skoðar; 523svá nýsisk fróðra hverr fyrir. 524 5258. 526Hinn er sæll, 527er sér of getr 528lof ok líknstafi; 529ódælla er við þat, 530er maðr eiga skal 531annars brjóstum í. 532""" 533 534""" 535C'est revenir tard, je le sens, sur un sujet trop rebattu et déjà presque oublié. Mon état, qui ne me permet plus aucun travail suivi, mon aversion pour le genre polémique, ont causé ma lenteur à écrire et ma répugnance à publier. J'aurais même tout à fait supprimé ces Lettres, ou plutôt je lie les aurais point écrites, s'il n'eût été question que de moi : Mais ma patrie ne m'est pas tellement devenue étrangère que je puisse voir tranquillement opprimer ses citoyens, surtout lorsqu'ils n'ont compromis leurs droits qu'en défendant ma cause. Je serais le dernier des hommes si dans une telle occasion j'écoutais un sentiment qui n'est plus ni douceur ni patience, mais faiblesse et lâcheté, dans celui qu'il empêche de remplir son devoir. 536Rien de moins important pour le public, j'en conviens, que la matière de ces lettres. La constitution d'une petite République, le sort d'un petit particulier, l'exposé de quelques injustices, la réfutation de quelques sophismes ; tout cela n'a rien en soi d'assez considérable pour mériter beaucoup de lecteurs : mais si mes sujets sont petits mes objets sont grands, et dignes de l'attention de tout honnête homme. Laissons Genève à sa place, et Rousseau dans sa dépression ; mais la religion, mais la liberté, la justice ! voilà, qui que vous soyez, ce qui n'est pas au-dessous de vous. 537Qu'on ne cherche pas même ici dans le style le dédommagement de l'aridité de la matière. Ceux que quelques traits heureux de ma plume ont si fort irrités trouveront de quoi s'apaiser dans ces lettres, L'honneur de défendre un opprimé eût enflammé mon coeur si j'avais parlé pour un autre. Réduit au triste emploi de me défendre moi-même, j'ai dû me borner à raisonner ; m'échauffer eût été m'avilir. J'aurai donc trouvé grâce en ce point devant ceux qui s'imaginent qu'il est essentiel à la vérité d'être dite froidement ; opinion que pourtant j'ai peine à comprendre. Lorsqu'une vive persuasion nous anime, le moyen d'employer un langage glacé ? Quand Archimède tout transporté courait nu dans les rues de Syracuse, en avait-il moins trouvé la vérité parce qu'il se passionnait pour elle ? Tout au contraire, celui qui la sent ne peut s'abstenir de l'adorer ; celui qui demeure froid ne l'a pas vue. 538Quoi qu'il en soit, je prie les lecteurs de vouloir bien mettre à part mon beau style, et d'examiner seulement si je raisonne bien ou mal ; car enfin, de cela seul qu'un auteur s'exprime en bons termes, je ne vois pas comment il peut s'ensuivre que cet auteur ne sait ce qu'il dit. 539""" 540