183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh"""Sort performance test. 283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehSee main() for command line syntax. 483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehSee tabulate() for output format. 583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" 783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport sys 983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport time 1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport random 1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport marshal 1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport tempfile 1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport os 1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehtd = tempfile.gettempdir() 1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef randfloats(n): 1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Return a list of n random floats in [0, 1).""" 1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Generating floats is expensive, so this writes them out to a file in 2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # a temp directory. If the file already exists, it just reads them 2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # back in and shuffles them a bit. 2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fn = os.path.join(td, "rr%06d" % n) 2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fp = open(fn, "rb") 2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except IOError: 2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh r = random.random 2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = [r() for i in xrange(n)] 2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fp = open(fn, "wb") 3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh marshal.dump(result, fp) 3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fp.close() 3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fp = None 3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh finally: 3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if fp: 3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh os.unlink(fn) 3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except os.error: 3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh pass 4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except IOError, msg: 4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh print "can't write", fn, ":", msg 4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = marshal.load(fp) 4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fp.close() 4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Shuffle it a bit... 4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in range(10): 4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh i = random.randrange(n) 4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh temp = result[:i] 4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del result[:i] 5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh temp.reverse() 5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result.extend(temp) 5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del temp 5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh assert len(result) == n 5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef flush(): 5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sys.stdout.flush() 5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef doit(L): 6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh t0 = time.clock() 6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L.sort() 6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh t1 = time.clock() 6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh print "%6.2f" % (t1-t0), 6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh flush() 6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef tabulate(r): 6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Tabulate sort speed for lists of various sizes. 6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The sizes are 2**i for i in r (the argument, a list). 7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The output displays i, 2**i, and the time to sort arrays of 2**i 7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh floating point numbers with the following properties: 7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh *sort: random data 7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh \sort: descending data 7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh /sort: ascending data 7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3sort: ascending, then 3 random exchanges 7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh +sort: ascending, then 10 random at the end 7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh %sort: ascending, then randomly replace 1% of the elements w/ random values 8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ~sort: many duplicates 8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh =sort: all equal 8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh !sort: worst case scenario 8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) 8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh fmt = ("%2s %7s" + " %6s"*len(cases)) 8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh print fmt % (("i", "2**i") + cases) 8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in r: 8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh n = 1 << i 9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L = randfloats(n) 9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh print "%2d %7d" % (i, n), 9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh flush() 9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # *sort 9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L.reverse() 9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # \sort 9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # /sort 9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Do 3 random exchanges. 9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for dummy in range(3): 10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh i1 = random.randrange(n) 10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh i2 = random.randrange(n) 10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L[i1], L[i2] = L[i2], L[i1] 10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # 3sort 10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Replace the last 10 with random floats. 10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if n >= 10: 10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L[-10:] = [random.random() for dummy in range(10)] 10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # +sort 10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Replace 1% of the elements at random. 11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for dummy in xrange(n // 100): 11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L[random.randrange(n)] = random.random() 11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # %sort 11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Arrange for lots of duplicates. 11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if n > 4: 11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del L[4:] 11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L = L * (n // 4) 11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Force the elements to be distinct objects, else timings can be 12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # artificially low. 12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L = map(lambda x: --x, L) 12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # ~sort 12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del L 12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # All equal. Again, force the elements to be distinct objects. 12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L = map(abs, [-0.5] * n) 12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # =sort 12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del L 12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case 13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # for an older implementation of quicksort, which used the median 13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # of the first, last and middle elements as the pivot. 13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh half = n // 2 13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L = range(half - 1, -1, -1) 13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L.extend(range(half)) 13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Force to float, so that the timings are comparable. This is 13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # significantly faster if we leave tham as ints. 13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh L = map(float, L) 13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh doit(L) # !sort 14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh print 14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef main(): 14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Main program when invoked as a script. 14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh One argument: tabulate a single row. 14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Two arguments: tabulate a range (inclusive). 14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Extra arguments are used to seed the random generator. 14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # default range (inclusive) 15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh k1 = 15 15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh k2 = 20 15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if sys.argv[1:]: 15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # one argument: single point 15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh k1 = k2 = int(sys.argv[1]) 15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if sys.argv[2:]: 15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # two arguments: specify range 15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh k2 = int(sys.argv[2]) 15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if sys.argv[3:]: 16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # derive random seed from remaining arguments 16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh x = 1 16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for a in sys.argv[3:]: 16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh x = 69069 * x + hash(a) 16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh random.seed(x) 16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh r = range(k1, k2+1) # include the end point 16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tabulate(r) 16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif __name__ == '__main__': 16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh main() 170