1da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// Mini-benchmark for tsan VTS worst case performance 2da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// Idea: 3da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 1) Spawn M + N threads (M >> N) 4da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// We'll call the 'M' threads as 'garbage threads'. 5da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 2) Make sure all threads have created thus no TIDs were reused 6da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 3) Join the garbage threads 7da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 4) Do many sync operations on the remaining N threads 8da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 9da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// It turns out that due to O(M+N) VTS complexity the (4) is much slower with 10da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// when N is large. 11da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 12da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// Some numbers: 13da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// a) clang++ native O1 with n_iterations=200kk takes 14da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 5s regardless of M 15da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// clang++ tsanv2 O1 with n_iterations=20kk takes 16da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 23.5s with M=200 17da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 11.5s with M=1 18da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// i.e. tsanv2 is ~23x to ~47x slower than native, depends on M. 19da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// b) g++ native O1 with n_iterations=200kk takes 20da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 5.5s regardless of M 21da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// g++ tsanv1 O1 with n_iterations=2kk takes 22da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 39.5s with M=200 23da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 20.5s with M=1 24da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// i.e. tsanv1 is ~370x to ~720x slower than native, depends on M. 25da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 26da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <assert.h> 27da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <pthread.h> 28da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <stdio.h> 29da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <stdlib.h> 30da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 31da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyclass __attribute__((aligned(64))) Mutex { 32da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany public: 33da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany Mutex() { pthread_mutex_init(&m_, NULL); } 34da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany ~Mutex() { pthread_mutex_destroy(&m_); } 35da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany void Lock() { pthread_mutex_lock(&m_); } 36da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany void Unlock() { pthread_mutex_unlock(&m_); } 37da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 38da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany private: 39da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_mutex_t m_; 40da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany}; 41da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 42da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyconst int kNumMutexes = 1024; 43da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya SerebryanyMutex mutexes[kNumMutexes]; 44da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 45da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyint n_threads, n_iterations; 46da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 47da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanypthread_barrier_t all_threads_ready, main_threads_ready; 48da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 49da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyvoid* GarbageThread(void *unused) { 50da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_wait(&all_threads_ready); 51da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany return 0; 52da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany} 53da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 54da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyvoid *Thread(void *arg) { 55da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany long idx = (long)arg; 56da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_wait(&all_threads_ready); 57da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 58da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany // Wait for the main thread to join the garbage threads. 59da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_wait(&main_threads_ready); 60da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 61da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany printf("Thread %ld go!\n", idx); 62da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany int offset = idx * kNumMutexes / n_threads; 63da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany for (int i = 0; i < n_iterations; i++) { 64da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany mutexes[(offset + i) % kNumMutexes].Lock(); 65da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany mutexes[(offset + i) % kNumMutexes].Unlock(); 66da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 67da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany printf("Thread %ld done\n", idx); 68da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany return 0; 69da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany} 70da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 71da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyint main(int argc, char **argv) { 72da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany int n_garbage_threads; 73da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany if (argc == 1) { 74da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany n_threads = 2; 75da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany n_garbage_threads = 200; 76da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany n_iterations = 20000000; 77da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } else if (argc == 4) { 78da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany n_threads = atoi(argv[1]); 79da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany assert(n_threads > 0 && n_threads <= 32); 80da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany n_garbage_threads = atoi(argv[2]); 81da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany assert(n_garbage_threads > 0 && n_garbage_threads <= 16000); 82da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany n_iterations = atoi(argv[3]); 83da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } else { 84da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]); 85da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany return 1; 86da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 87da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n", 88da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany __FILE__, n_threads, n_garbage_threads, n_iterations); 89da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 90da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1); 91da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1); 92da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 93da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_t *t = new pthread_t[n_threads]; 94da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany { 95da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_t *g_t = new pthread_t[n_garbage_threads]; 96da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany for (int i = 0; i < n_garbage_threads; i++) { 97da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany int status = pthread_create(&g_t[i], 0, GarbageThread, NULL); 98da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany assert(status == 0); 99da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 100da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany for (int i = 0; i < n_threads; i++) { 101da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany int status = pthread_create(&t[i], 0, Thread, (void*)i); 102da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany assert(status == 0); 103da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 104da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_wait(&all_threads_ready); 105da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany printf("All threads started! Killing the garbage threads.\n"); 106da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany for (int i = 0; i < n_garbage_threads; i++) { 107da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_join(g_t[i], 0); 108da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 109da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany delete [] g_t; 110da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 111da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany printf("Resuming the main threads.\n"); 112da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_barrier_wait(&main_threads_ready); 113da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 114da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany 115da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany for (int i = 0; i < n_threads; i++) { 116da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany pthread_join(t[i], 0); 117da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany } 118da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany delete [] t; 119da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany return 0; 120da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany} 121