iobench.py revision 31b3763fa304038a0f1d8a0be5e4a2b41026f107
1# -*- coding: utf-8 -*-
2# This file should be kept compatible with both Python 2.6 and Python >= 3.0.
3
4import time
5import os
6import re
7import sys
8import hashlib
9import functools
10import itertools
11from optparse import OptionParser
12
13out = sys.stdout
14
15TEXT_ENCODING = 'utf8'
16NEWLINES = 'lf'
17
18# Compatibility
19try:
20    xrange
21except NameError:
22    xrange = range
23
24def text_open(fn, mode, encoding=None):
25    try:
26        return open(fn, mode, encoding=encoding or TEXT_ENCODING)
27    except TypeError:
28        return open(fn, mode)
29
30def get_file_sizes():
31    for s in ['20 KB', '400 KB', '10 MB']:
32        size, unit = s.split()
33        size = int(size) * {'KB': 1024, 'MB': 1024 ** 2}[unit]
34        yield s.replace(' ', ''), size
35
36def get_binary_files():
37    return ((name + ".bin", size) for name, size in get_file_sizes())
38
39def get_text_files():
40    return (("%s-%s-%s.txt" % (name, TEXT_ENCODING, NEWLINES), size)
41        for name, size in get_file_sizes())
42
43def with_open_mode(mode):
44    def decorate(f):
45        f.file_open_mode = mode
46        return f
47    return decorate
48
49def with_sizes(*sizes):
50    def decorate(f):
51        f.file_sizes = sizes
52        return f
53    return decorate
54
55
56# Here begin the tests
57
58@with_open_mode("r")
59@with_sizes("medium")
60def read_bytewise(f):
61    """ read one unit at a time """
62    f.seek(0)
63    while f.read(1):
64        pass
65
66@with_open_mode("r")
67@with_sizes("medium")
68def read_small_chunks(f):
69    """ read 20 units at a time """
70    f.seek(0)
71    while f.read(20):
72        pass
73
74@with_open_mode("r")
75@with_sizes("medium")
76def read_big_chunks(f):
77    """ read 4096 units at a time """
78    f.seek(0)
79    while f.read(4096):
80        pass
81
82@with_open_mode("r")
83@with_sizes("small", "medium", "large")
84def read_whole_file(f):
85    """ read whole contents at once """
86    f.seek(0)
87    while f.read():
88        pass
89
90@with_open_mode("rt")
91@with_sizes("medium")
92def read_lines(f):
93    """ read one line at a time """
94    f.seek(0)
95    for line in f:
96        pass
97
98@with_open_mode("r")
99@with_sizes("medium")
100def seek_forward_bytewise(f):
101    """ seek forward one unit at a time """
102    f.seek(0, 2)
103    size = f.tell()
104    f.seek(0, 0)
105    for i in xrange(0, size - 1):
106        f.seek(i, 0)
107
108@with_open_mode("r")
109@with_sizes("medium")
110def seek_forward_blockwise(f):
111    """ seek forward 1000 units at a time """
112    f.seek(0, 2)
113    size = f.tell()
114    f.seek(0, 0)
115    for i in xrange(0, size - 1, 1000):
116        f.seek(i, 0)
117
118@with_open_mode("rb")
119@with_sizes("medium")
120def read_seek_bytewise(f):
121    """ alternate read & seek one unit """
122    f.seek(0)
123    while f.read(1):
124        f.seek(1, 1)
125
126@with_open_mode("rb")
127@with_sizes("medium")
128def read_seek_blockwise(f):
129    """ alternate read & seek 1000 units """
130    f.seek(0)
131    while f.read(1000):
132        f.seek(1000, 1)
133
134
135@with_open_mode("w")
136@with_sizes("small")
137def write_bytewise(f, source):
138    """ write one unit at a time """
139    for i in xrange(0, len(source)):
140        f.write(source[i:i+1])
141
142@with_open_mode("w")
143@with_sizes("medium")
144def write_small_chunks(f, source):
145    """ write 20 units at a time """
146    for i in xrange(0, len(source), 20):
147        f.write(source[i:i+20])
148
149@with_open_mode("w")
150@with_sizes("medium")
151def write_medium_chunks(f, source):
152    """ write 4096 units at a time """
153    for i in xrange(0, len(source), 4096):
154        f.write(source[i:i+4096])
155
156@with_open_mode("w")
157@with_sizes("large")
158def write_large_chunks(f, source):
159    """ write 1e6 units at a time """
160    for i in xrange(0, len(source), 1000000):
161        f.write(source[i:i+1000000])
162
163
164@with_open_mode("w+")
165@with_sizes("small")
166def modify_bytewise(f, source):
167    """ modify one unit at a time """
168    f.seek(0)
169    for i in xrange(0, len(source)):
170        f.write(source[i:i+1])
171
172@with_open_mode("w+")
173@with_sizes("medium")
174def modify_small_chunks(f, source):
175    """ modify 20 units at a time """
176    f.seek(0)
177    for i in xrange(0, len(source), 20):
178        f.write(source[i:i+20])
179
180@with_open_mode("w+")
181@with_sizes("medium")
182def modify_medium_chunks(f, source):
183    """ modify 4096 units at a time """
184    f.seek(0)
185    for i in xrange(0, len(source), 4096):
186        f.write(source[i:i+4096])
187
188@with_open_mode("wb+")
189@with_sizes("medium")
190def modify_seek_forward_bytewise(f, source):
191    """ alternate write & seek one unit """
192    f.seek(0)
193    for i in xrange(0, len(source), 2):
194        f.write(source[i:i+1])
195        f.seek(i+2)
196
197@with_open_mode("wb+")
198@with_sizes("medium")
199def modify_seek_forward_blockwise(f, source):
200    """ alternate write & seek 1000 units """
201    f.seek(0)
202    for i in xrange(0, len(source), 2000):
203        f.write(source[i:i+1000])
204        f.seek(i+2000)
205
206# XXX the 2 following tests don't work with py3k's text IO
207@with_open_mode("wb+")
208@with_sizes("medium")
209def read_modify_bytewise(f, source):
210    """ alternate read & write one unit """
211    f.seek(0)
212    for i in xrange(0, len(source), 2):
213        f.read(1)
214        f.write(source[i+1:i+2])
215
216@with_open_mode("wb+")
217@with_sizes("medium")
218def read_modify_blockwise(f, source):
219    """ alternate read & write 1000 units """
220    f.seek(0)
221    for i in xrange(0, len(source), 2000):
222        f.read(1000)
223        f.write(source[i+1000:i+2000])
224
225
226read_tests = [
227    read_bytewise, read_small_chunks, read_lines, read_big_chunks,
228    None, read_whole_file, None,
229    seek_forward_bytewise, seek_forward_blockwise,
230    read_seek_bytewise, read_seek_blockwise,
231]
232
233write_tests = [
234    write_bytewise, write_small_chunks, write_medium_chunks, write_large_chunks,
235]
236
237modify_tests = [
238    modify_bytewise, modify_small_chunks, modify_medium_chunks,
239    None,
240    modify_seek_forward_bytewise, modify_seek_forward_blockwise,
241    read_modify_bytewise, read_modify_blockwise,
242]
243
244def run_during(duration, func):
245    _t = time.time
246    n = 0
247    start = os.times()
248    start_timestamp = _t()
249    real_start = start[4] or start_timestamp
250    while True:
251        func()
252        n += 1
253        if _t() - start_timestamp > duration:
254            break
255    end = os.times()
256    real = (end[4] if start[4] else time.time()) - real_start
257    return n, real, sum(end[0:2]) - sum(start[0:2])
258
259def warm_cache(filename):
260    with open(filename, "rb") as f:
261        f.read()
262
263
264def run_all_tests(options):
265    def print_label(filename, func):
266        name = re.split(r'[-.]', filename)[0]
267        out.write(
268            ("[%s] %s... "
269                % (name.center(7), func.__doc__.strip())
270            ).ljust(52))
271        out.flush()
272
273    def print_results(size, n, real, cpu):
274        bw = n * float(size) / 1024 ** 2 / real
275        bw = ("%4d MB/s" if bw > 100 else "%.3g MB/s") % bw
276        out.write(bw.rjust(12) + "\n")
277        if cpu < 0.90 * real:
278            out.write("   warning: test above used only %d%% CPU, "
279                "result may be flawed!\n" % (100.0 * cpu / real))
280
281    def run_one_test(name, size, open_func, test_func, *args):
282        mode = test_func.file_open_mode
283        print_label(name, test_func)
284        if "w" not in mode or "+" in mode:
285            warm_cache(name)
286        with open_func(name) as f:
287            n, real, cpu = run_during(1.5, lambda: test_func(f, *args))
288        print_results(size, n, real, cpu)
289
290    def run_test_family(tests, mode_filter, files, open_func, *make_args):
291        for test_func in tests:
292            if test_func is None:
293                out.write("\n")
294                continue
295            if mode_filter in test_func.file_open_mode:
296                continue
297            for s in test_func.file_sizes:
298                name, size = files[size_names[s]]
299                #name += file_ext
300                args = tuple(f(name, size) for f in make_args)
301                run_one_test(name, size,
302                    open_func, test_func, *args)
303
304    size_names = {
305        "small": 0,
306        "medium": 1,
307        "large": 2,
308    }
309
310    binary_files = list(get_binary_files())
311    text_files = list(get_text_files())
312    if "b" in options:
313        print("Binary unit = one byte")
314    if "t" in options:
315        print("Text unit = one character (%s-decoded)" % TEXT_ENCODING)
316
317    # Binary reads
318    if "b" in options and "r" in options:
319        print("\n** Binary input **\n")
320        run_test_family(read_tests, "t", binary_files, lambda fn: open(fn, "rb"))
321
322    # Text reads
323    if "t" in options and "r" in options:
324        print("\n** Text input **\n")
325        run_test_family(read_tests, "b", text_files, lambda fn: text_open(fn, "r"))
326
327    # Binary writes
328    if "b" in options and "w" in options:
329        print("\n** Binary append **\n")
330        def make_test_source(name, size):
331            with open(name, "rb") as f:
332                return f.read()
333        run_test_family(write_tests, "t", binary_files,
334            lambda fn: open(os.devnull, "wb"), make_test_source)
335
336    # Text writes
337    if "t" in options and "w" in options:
338        print("\n** Text append **\n")
339        def make_test_source(name, size):
340            with text_open(name, "r") as f:
341                return f.read()
342        run_test_family(write_tests, "b", text_files,
343            lambda fn: text_open(os.devnull, "w"), make_test_source)
344
345    # Binary overwrites
346    if "b" in options and "w" in options:
347        print("\n** Binary overwrite **\n")
348        def make_test_source(name, size):
349            with open(name, "rb") as f:
350                return f.read()
351        run_test_family(modify_tests, "t", binary_files,
352            lambda fn: open(fn, "r+b"), make_test_source)
353
354    # Text overwrites
355    if "t" in options and "w" in options:
356        print("\n** Text overwrite **\n")
357        def make_test_source(name, size):
358            with text_open(name, "r") as f:
359                return f.read()
360        run_test_family(modify_tests, "b", text_files,
361            lambda fn: text_open(fn, "r+"), make_test_source)
362
363
364def prepare_files():
365    print("Preparing files...")
366    # Binary files
367    for name, size in get_binary_files():
368        if os.path.isfile(name) and os.path.getsize(name) == size:
369            continue
370        with open(name, "wb") as f:
371            f.write(os.urandom(size))
372    # Text files
373    chunk = []
374    with text_open(__file__, "rU", encoding='utf8') as f:
375        for line in f:
376            if line.startswith("# <iobench text chunk marker>"):
377                break
378        else:
379            raise RuntimeError(
380                "Couldn't find chunk marker in %s !" % __file__)
381        if NEWLINES == "all":
382            it = itertools.cycle(["\n", "\r", "\r\n"])
383        else:
384            it = itertools.repeat(
385                {"cr": "\r", "lf": "\n", "crlf": "\r\n"}[NEWLINES])
386        chunk = "".join(line.replace("\n", next(it)) for line in f)
387        if isinstance(chunk, bytes):
388            chunk = chunk.decode('utf8')
389        chunk = chunk.encode(TEXT_ENCODING)
390    for name, size in get_text_files():
391        if os.path.isfile(name) and os.path.getsize(name) == size:
392            continue
393        head = chunk * (size // len(chunk))
394        tail = chunk[:size % len(chunk)]
395        # Adjust tail to end on a character boundary
396        while True:
397            try:
398                tail.decode(TEXT_ENCODING)
399                break
400            except UnicodeDecodeError:
401                tail = tail[:-1]
402        with open(name, "wb") as f:
403            f.write(head)
404            f.write(tail)
405
406def main():
407    global TEXT_ENCODING, NEWLINES
408
409    usage = "usage: %prog [-h|--help] [options]"
410    parser = OptionParser(usage=usage)
411    parser.add_option("-b", "--binary",
412                      action="store_true", dest="binary", default=False,
413                      help="run binary I/O tests")
414    parser.add_option("-t", "--text",
415                      action="store_true", dest="text", default=False,
416                      help="run text I/O tests")
417    parser.add_option("-r", "--read",
418                      action="store_true", dest="read", default=False,
419                      help="run read tests")
420    parser.add_option("-w", "--write",
421                      action="store_true", dest="write", default=False,
422                      help="run write & modify tests")
423    parser.add_option("-E", "--encoding",
424                      action="store", dest="encoding", default=None,
425                      help="encoding for text tests (default: %s)" % TEXT_ENCODING)
426    parser.add_option("-N", "--newlines",
427                      action="store", dest="newlines", default='lf',
428                      help="line endings for text tests "
429                           "(one of: {lf (default), cr, crlf, all})")
430    options, args = parser.parse_args()
431    if args:
432        parser.error("unexpected arguments")
433    NEWLINES = options.newlines.lower()
434    if NEWLINES not in ('lf', 'cr', 'crlf', 'all'):
435        parser.error("invalid 'newlines' option: %r" % NEWLINES)
436
437    test_options = ""
438    if options.read:
439        test_options += "r"
440    if options.write:
441        test_options += "w"
442    elif not options.read:
443        test_options += "rw"
444    if options.text:
445        test_options += "t"
446    if options.binary:
447        test_options += "b"
448    elif not options.text:
449        test_options += "tb"
450
451    if options.encoding:
452        TEXT_ENCODING = options.encoding
453
454    prepare_files()
455    run_all_tests(test_options)
456
457if __name__ == "__main__":
458    main()
459
460
461# -- This part to exercise text reading. Don't change anything! --
462# <iobench text chunk marker>
463
464"""
4651.
466Gáttir allar,
467áðr gangi fram,
468um skoðask skyli,
469um skyggnast skyli,
470því at óvíst er at vita,
471hvar óvinir
472sitja á fleti fyrir.
473
4742.
475Gefendr heilir!
476Gestr er inn kominn,
477hvar skal sitja sjá?
478Mjök er bráðr,
479sá er á bröndum skal
480síns of freista frama.
481
4823.
483Elds er þörf,
484þeims inn er kominn
485ok á kné kalinn;
486matar ok váða
487er manni þörf,
488þeim er hefr um fjall farit.
489
4904.
491Vatns er þörf,
492þeim er til verðar kemr,
493þerru ok þjóðlaðar,
494góðs of æðis,
495ef sér geta mætti,
496orðs ok endrþögu.
497
4985.
499Vits er þörf,
500þeim er víða ratar;
501dælt er heima hvat;
502at augabragði verðr,
503sá er ekki kann
504ok með snotrum sitr.
505
5066.
507At hyggjandi sinni
508skyli-t maðr hræsinn vera,
509heldr gætinn at geði;
510þá er horskr ok þögull
511kemr heimisgarða til,
512sjaldan verðr víti vörum,
513því at óbrigðra vin
514fær maðr aldregi
515en mannvit mikit.
516
5177.
518Inn vari gestr,
519er til verðar kemr,
520þunnu hljóði þegir,
521eyrum hlýðir,
522en augum skoðar;
523svá nýsisk fróðra hverr fyrir.
524
5258.
526Hinn er sæll,
527er sér of getr
528lof ok líknstafi;
529ódælla er við þat,
530er maðr eiga skal
531annars brjóstum í.
532"""
533
534"""
535C'est revenir tard, je le sens, sur un sujet trop rebattu et déjà presque oublié. Mon état, qui ne me permet plus aucun travail suivi, mon aversion pour le genre polémique, ont causé ma lenteur à écrire et ma répugnance à publier. J'aurais même tout à fait supprimé ces Lettres, ou plutôt je lie les aurais point écrites, s'il n'eût été question que de moi : Mais ma patrie ne m'est pas tellement devenue étrangère que je puisse voir tranquillement opprimer ses citoyens, surtout lorsqu'ils n'ont compromis leurs droits qu'en défendant ma cause. Je serais le dernier des hommes si dans une telle occasion j'écoutais un sentiment qui n'est plus ni douceur ni patience, mais faiblesse et lâcheté, dans celui qu'il empêche de remplir son devoir.
536Rien de moins important pour le public, j'en conviens, que la matière de ces lettres. La constitution d'une petite République, le sort d'un petit particulier, l'exposé de quelques injustices, la réfutation de quelques sophismes ; tout cela n'a rien en soi d'assez considérable pour mériter beaucoup de lecteurs : mais si mes sujets sont petits mes objets sont grands, et dignes de l'attention de tout honnête homme. Laissons Genève à sa place, et Rousseau dans sa dépression ; mais la religion, mais la liberté, la justice ! voilà, qui que vous soyez, ce qui n'est pas au-dessous de vous.
537Qu'on ne cherche pas même ici dans le style le dédommagement de l'aridité de la matière. Ceux que quelques traits heureux de ma plume ont si fort irrités trouveront de quoi s'apaiser dans ces lettres, L'honneur de défendre un opprimé eût enflammé mon coeur si j'avais parlé pour un autre. Réduit au triste emploi de me défendre moi-même, j'ai dû me borner à raisonner ; m'échauffer eût été m'avilir. J'aurai donc trouvé grâce en ce point devant ceux qui s'imaginent qu'il est essentiel à la vérité d'être dite froidement ; opinion que pourtant j'ai peine à comprendre. Lorsqu'une vive persuasion nous anime, le moyen d'employer un langage glacé ? Quand Archimède tout transporté courait nu dans les rues de Syracuse, en avait-il moins trouvé la vérité parce qu'il se passionnait pour elle ? Tout au contraire, celui qui la sent ne peut s'abstenir de l'adorer ; celui qui demeure froid ne l'a pas vue.
538Quoi qu'il en soit, je prie les lecteurs de vouloir bien mettre à part mon beau style, et d'examiner seulement si je raisonne bien ou mal ; car enfin, de cela seul qu'un auteur s'exprime en bons termes, je ne vois pas comment il peut s'ensuivre que cet auteur ne sait ce qu'il dit.
539"""
540