1da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// Mini-benchmark for tsan VTS worst case performance
2da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// Idea:
3da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 1) Spawn M + N threads (M >> N)
4da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//    We'll call the 'M' threads as 'garbage threads'.
5da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 2) Make sure all threads have created thus no TIDs were reused
6da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 3) Join the garbage threads
7da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// 4) Do many sync operations on the remaining N threads
8da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//
9da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// It turns out that due to O(M+N) VTS complexity the (4) is much slower with
10da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// when N is large.
11da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//
12da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// Some numbers:
13da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// a) clang++ native O1 with n_iterations=200kk takes
14da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//      5s regardless of M
15da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//    clang++ tsanv2 O1 with n_iterations=20kk takes
16da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//      23.5s with M=200
17da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//      11.5s with M=1
18da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//    i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
19da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany// b) g++ native O1 with n_iterations=200kk takes
20da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//      5.5s regardless of M
21da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//    g++ tsanv1 O1 with n_iterations=2kk takes
22da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//      39.5s with M=200
23da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//      20.5s with M=1
24da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany//    i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
25da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
26da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <assert.h>
27da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <pthread.h>
28da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <stdio.h>
29da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany#include <stdlib.h>
30da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
31da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyclass __attribute__((aligned(64))) Mutex {
32da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany public:
33da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  Mutex()  { pthread_mutex_init(&m_, NULL); }
34da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  ~Mutex() { pthread_mutex_destroy(&m_); }
35da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  void Lock() { pthread_mutex_lock(&m_); }
36da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  void Unlock() { pthread_mutex_unlock(&m_); }
37da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
38da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany private:
39da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_mutex_t m_;
40da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany};
41da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
42da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyconst int kNumMutexes = 1024;
43da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya SerebryanyMutex mutexes[kNumMutexes];
44da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
45da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyint n_threads, n_iterations;
46da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
47da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanypthread_barrier_t all_threads_ready, main_threads_ready;
48da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
49da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyvoid* GarbageThread(void *unused) {
50da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_barrier_wait(&all_threads_ready);
51da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  return 0;
52da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany}
53da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
54da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyvoid *Thread(void *arg) {
55da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  long idx = (long)arg;
56da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_barrier_wait(&all_threads_ready);
57da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
58da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  // Wait for the main thread to join the garbage threads.
59da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_barrier_wait(&main_threads_ready);
60da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
61da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  printf("Thread %ld go!\n", idx);
62da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  int offset = idx * kNumMutexes / n_threads;
63da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  for (int i = 0; i < n_iterations; i++) {
64da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    mutexes[(offset + i) % kNumMutexes].Lock();
65da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    mutexes[(offset + i) % kNumMutexes].Unlock();
66da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  }
67da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  printf("Thread %ld done\n", idx);
68da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  return 0;
69da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany}
70da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
71da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryanyint main(int argc, char **argv) {
72da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  int n_garbage_threads;
73da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  if (argc == 1) {
74da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    n_threads = 2;
75da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    n_garbage_threads = 200;
76da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    n_iterations = 20000000;
77da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  } else if (argc == 4) {
78da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    n_threads = atoi(argv[1]);
79da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    assert(n_threads > 0 && n_threads <= 32);
80da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    n_garbage_threads = atoi(argv[2]);
81da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
82da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    n_iterations = atoi(argv[3]);
83da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  } else {
84da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
85da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    return 1;
86da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  }
87da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
88da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany         __FILE__, n_threads, n_garbage_threads, n_iterations);
89da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
90da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
91da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
92da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
93da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_t *t = new pthread_t[n_threads];
94da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  {
95da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    pthread_t *g_t = new pthread_t[n_garbage_threads];
96da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    for (int i = 0; i < n_garbage_threads; i++) {
97da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany      int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
98da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany      assert(status == 0);
99da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    }
100da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    for (int i = 0; i < n_threads; i++) {
101da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany      int status = pthread_create(&t[i], 0, Thread, (void*)i);
102da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany      assert(status == 0);
103da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    }
104da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    pthread_barrier_wait(&all_threads_ready);
105da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    printf("All threads started! Killing the garbage threads.\n");
106da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    for (int i = 0; i < n_garbage_threads; i++) {
107da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany      pthread_join(g_t[i], 0);
108da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    }
109da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    delete [] g_t;
110da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  }
111da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  printf("Resuming the main threads.\n");
112da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  pthread_barrier_wait(&main_threads_ready);
113da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
114da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany
115da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  for (int i = 0; i < n_threads; i++) {
116da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany    pthread_join(t[i], 0);
117da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  }
118da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  delete [] t;
119da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany  return 0;
120da4edd850db1a333c15fc3b0abc01a2e8d2f08feKostya Serebryany}
121