183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh"""Sort performance test.
283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehSee main() for command line syntax.
483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehSee tabulate() for output format.
583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh"""
783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport sys
983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport time
1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport random
1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport marshal
1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport tempfile
1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport os
1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehtd = tempfile.gettempdir()
1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef randfloats(n):
1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Return a list of n random floats in [0, 1)."""
1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # Generating floats is expensive, so this writes them out to a file in
2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # a temp directory.  If the file already exists, it just reads them
2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # back in and shuffles them a bit.
2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    fn = os.path.join(td, "rr%06d" % n)
2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    try:
2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        fp = open(fn, "rb")
2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    except IOError:
2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        r = random.random
2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        result = [r() for i in xrange(n)]
2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            try:
3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                fp = open(fn, "wb")
3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                marshal.dump(result, fp)
3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                fp.close()
3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                fp = None
3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            finally:
3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if fp:
3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    try:
3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                        os.unlink(fn)
3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    except os.error:
3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                        pass
4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except IOError, msg:
4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            print "can't write", fn, ":", msg
4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    else:
4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        result = marshal.load(fp)
4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        fp.close()
4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Shuffle it a bit...
4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in range(10):
4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            i = random.randrange(n)
4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            temp = result[:i]
4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            del result[:i]
5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            temp.reverse()
5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            result.extend(temp)
5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            del temp
5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    assert len(result) == n
5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return result
5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef flush():
5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    sys.stdout.flush()
5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef doit(L):
6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    t0 = time.clock()
6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    L.sort()
6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    t1 = time.clock()
6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    print "%6.2f" % (t1-t0),
6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    flush()
6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef tabulate(r):
6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Tabulate sort speed for lists of various sizes.
6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    The sizes are 2**i for i in r (the argument, a list).
7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    The output displays i, 2**i, and the time to sort arrays of 2**i
7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    floating point numbers with the following properties:
7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    *sort: random data
7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    \sort: descending data
7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    /sort: ascending data
7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    3sort: ascending, then 3 random exchanges
7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    +sort: ascending, then 10 random at the end
7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    %sort: ascending, then randomly replace 1% of the elements w/ random values
8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    ~sort: many duplicates
8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    =sort: all equal
8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    !sort: worst case scenario
8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    fmt = ("%2s %7s" + " %6s"*len(cases))
8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    print fmt % (("i", "2**i") + cases)
8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    for i in r:
8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        n = 1 << i
9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L = randfloats(n)
9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        print "%2d %7d" % (i, n),
9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        flush()
9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # *sort
9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L.reverse()
9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # \sort
9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # /sort
9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Do 3 random exchanges.
9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for dummy in range(3):
10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            i1 = random.randrange(n)
10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            i2 = random.randrange(n)
10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            L[i1], L[i2] = L[i2], L[i1]
10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # 3sort
10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Replace the last 10 with random floats.
10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if n >= 10:
10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            L[-10:] = [random.random() for dummy in range(10)]
10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # +sort
10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Replace 1% of the elements at random.
11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for dummy in xrange(n // 100):
11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            L[random.randrange(n)] = random.random()
11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # %sort
11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Arrange for lots of duplicates.
11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if n > 4:
11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            del L[4:]
11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            L = L * (n // 4)
11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # Force the elements to be distinct objects, else timings can be
12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # artificially low.
12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            L = map(lambda x: --x, L)
12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # ~sort
12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        del L
12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # All equal.  Again, force the elements to be distinct objects.
12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L = map(abs, [-0.5] * n)
12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # =sort
12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        del L
12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # for an older implementation of quicksort, which used the median
13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # of the first, last and middle elements as the pivot.
13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        half = n // 2
13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L = range(half - 1, -1, -1)
13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L.extend(range(half))
13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Force to float, so that the timings are comparable.  This is
13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # significantly faster if we leave tham as ints.
13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L = map(float, L)
13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        doit(L) # !sort
14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        print
14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef main():
14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Main program when invoked as a script.
14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    One argument: tabulate a single row.
14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Two arguments: tabulate a range (inclusive).
14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Extra arguments are used to seed the random generator.
14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # default range (inclusive)
15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    k1 = 15
15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    k2 = 20
15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if sys.argv[1:]:
15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # one argument: single point
15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        k1 = k2 = int(sys.argv[1])
15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if sys.argv[2:]:
15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # two arguments: specify range
15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            k2 = int(sys.argv[2])
15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if sys.argv[3:]:
16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                # derive random seed from remaining arguments
16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                x = 1
16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                for a in sys.argv[3:]:
16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    x = 69069 * x + hash(a)
16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                random.seed(x)
16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    r = range(k1, k2+1)                 # include the end point
16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    tabulate(r)
16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif __name__ == '__main__':
16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    main()
170