1"""Sort performance test.
2
3See main() for command line syntax.
4See tabulate() for output format.
5
6"""
7
8import sys
9import time
10import random
11import marshal
12import tempfile
13import os
14
15td = tempfile.gettempdir()
16
17def randfloats(n):
18    """Return a list of n random floats in [0, 1)."""
19    # Generating floats is expensive, so this writes them out to a file in
20    # a temp directory.  If the file already exists, it just reads them
21    # back in and shuffles them a bit.
22    fn = os.path.join(td, "rr%06d" % n)
23    try:
24        fp = open(fn, "rb")
25    except IOError:
26        r = random.random
27        result = [r() for i in xrange(n)]
28        try:
29            try:
30                fp = open(fn, "wb")
31                marshal.dump(result, fp)
32                fp.close()
33                fp = None
34            finally:
35                if fp:
36                    try:
37                        os.unlink(fn)
38                    except os.error:
39                        pass
40        except IOError, msg:
41            print "can't write", fn, ":", msg
42    else:
43        result = marshal.load(fp)
44        fp.close()
45        # Shuffle it a bit...
46        for i in range(10):
47            i = random.randrange(n)
48            temp = result[:i]
49            del result[:i]
50            temp.reverse()
51            result.extend(temp)
52            del temp
53    assert len(result) == n
54    return result
55
56def flush():
57    sys.stdout.flush()
58
59def doit(L):
60    t0 = time.clock()
61    L.sort()
62    t1 = time.clock()
63    print "%6.2f" % (t1-t0),
64    flush()
65
66def tabulate(r):
67    """Tabulate sort speed for lists of various sizes.
68
69    The sizes are 2**i for i in r (the argument, a list).
70
71    The output displays i, 2**i, and the time to sort arrays of 2**i
72    floating point numbers with the following properties:
73
74    *sort: random data
75    \sort: descending data
76    /sort: ascending data
77    3sort: ascending, then 3 random exchanges
78    +sort: ascending, then 10 random at the end
79    %sort: ascending, then randomly replace 1% of the elements w/ random values
80    ~sort: many duplicates
81    =sort: all equal
82    !sort: worst case scenario
83
84    """
85    cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
86    fmt = ("%2s %7s" + " %6s"*len(cases))
87    print fmt % (("i", "2**i") + cases)
88    for i in r:
89        n = 1 << i
90        L = randfloats(n)
91        print "%2d %7d" % (i, n),
92        flush()
93        doit(L) # *sort
94        L.reverse()
95        doit(L) # \sort
96        doit(L) # /sort
97
98        # Do 3 random exchanges.
99        for dummy in range(3):
100            i1 = random.randrange(n)
101            i2 = random.randrange(n)
102            L[i1], L[i2] = L[i2], L[i1]
103        doit(L) # 3sort
104
105        # Replace the last 10 with random floats.
106        if n >= 10:
107            L[-10:] = [random.random() for dummy in range(10)]
108        doit(L) # +sort
109
110        # Replace 1% of the elements at random.
111        for dummy in xrange(n // 100):
112            L[random.randrange(n)] = random.random()
113        doit(L) # %sort
114
115        # Arrange for lots of duplicates.
116        if n > 4:
117            del L[4:]
118            L = L * (n // 4)
119            # Force the elements to be distinct objects, else timings can be
120            # artificially low.
121            L = map(lambda x: --x, L)
122        doit(L) # ~sort
123        del L
124
125        # All equal.  Again, force the elements to be distinct objects.
126        L = map(abs, [-0.5] * n)
127        doit(L) # =sort
128        del L
129
130        # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
131        # for an older implementation of quicksort, which used the median
132        # of the first, last and middle elements as the pivot.
133        half = n // 2
134        L = range(half - 1, -1, -1)
135        L.extend(range(half))
136        # Force to float, so that the timings are comparable.  This is
137        # significantly faster if we leave tham as ints.
138        L = map(float, L)
139        doit(L) # !sort
140        print
141
142def main():
143    """Main program when invoked as a script.
144
145    One argument: tabulate a single row.
146    Two arguments: tabulate a range (inclusive).
147    Extra arguments are used to seed the random generator.
148
149    """
150    # default range (inclusive)
151    k1 = 15
152    k2 = 20
153    if sys.argv[1:]:
154        # one argument: single point
155        k1 = k2 = int(sys.argv[1])
156        if sys.argv[2:]:
157            # two arguments: specify range
158            k2 = int(sys.argv[2])
159            if sys.argv[3:]:
160                # derive random seed from remaining arguments
161                x = 1
162                for a in sys.argv[3:]:
163                    x = 69069 * x + hash(a)
164                random.seed(x)
165    r = range(k1, k2+1)                 # include the end point
166    tabulate(r)
167
168if __name__ == '__main__':
169    main()
170