tsan_clock.cc revision 86277eb844c4983c81de62d7c050e92fe7155788
1//===-- tsan_clock.cc -----------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of ThreadSanitizer (TSan), a race detector.
11//
12//===----------------------------------------------------------------------===//
13#include "tsan_clock.h"
14#include "tsan_rtl.h"
15#include "sanitizer_common/sanitizer_placement_new.h"
16
17// SyncClock and ThreadClock implement vector clocks for sync variables
18// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
19// ThreadClock contains fixed-size vector clock for maximum number of threads.
20// SyncClock contains growable vector clock for currently necessary number of
21// threads.
22// Together they implement very simple model of operations, namely:
23//
24//   void ThreadClock::acquire(const SyncClock *src) {
25//     for (int i = 0; i < kMaxThreads; i++)
26//       clock[i] = max(clock[i], src->clock[i]);
27//   }
28//
29//   void ThreadClock::release(SyncClock *dst) const {
30//     for (int i = 0; i < kMaxThreads; i++)
31//       dst->clock[i] = max(dst->clock[i], clock[i]);
32//   }
33//
34//   void ThreadClock::ReleaseStore(SyncClock *dst) const {
35//     for (int i = 0; i < kMaxThreads; i++)
36//       dst->clock[i] = clock[i];
37//   }
38//
39//   void ThreadClock::acq_rel(SyncClock *dst) {
40//     acquire(dst);
41//     release(dst);
42//   }
43//
44// Conformance to this model is extensively verified in tsan_clock_test.cc.
45// However, the implementation is significantly more complex. The complexity
46// allows to implement important classes of use cases in O(1) instead of O(N).
47//
48// The use cases are:
49// 1. Singleton/once atomic that has a single release-store operation followed
50//    by zillions of acquire-loads (the acquire-load is O(1)).
51// 2. Thread-local mutex (both lock and unlock can be O(1)).
52// 3. Leaf mutex (unlock is O(1)).
53// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
54// 5. An atomic with a single writer (writes can be O(1)).
55// The implementation dynamically adopts to workload. So if an atomic is in
56// read-only phase, these reads will be O(1); if it later switches to read/write
57// phase, the implementation will correctly handle that by switching to O(N).
58//
59// Thread-safety note: all const operations on SyncClock's are conducted under
60// a shared lock; all non-const operations on SyncClock's are conducted under
61// an exclusive lock; ThreadClock's are private to respective threads and so
62// do not need any protection.
63//
64// Description of ThreadClock state:
65// clk_ - fixed size vector clock.
66// nclk_ - effective size of the vector clock (the rest is zeros).
67// tid_ - index of the thread associated with he clock ("current thread").
68// last_acquire_ - current thread time when it acquired something from
69//   other threads.
70//
71// Description of SyncClock state:
72// clk_ - variable size vector clock, low kClkBits hold timestamp,
73//   the remaining bits hold "acquired" flag (the actual value is thread's
74//   reused counter);
75//   if acquried == thr->reused_, then the respective thread has already
76//   acquired this clock (except possibly dirty_tids_).
77// dirty_tids_ - holds up to two indeces in the vector clock that other threads
78//   need to acquire regardless of "acquired" flag value;
79// release_store_tid_ - denotes that the clock state is a result of
80//   release-store operation by the thread with release_store_tid_ index.
81// release_store_reused_ - reuse count of release_store_tid_.
82
83// We don't have ThreadState in these methods, so this is an ugly hack that
84// works only in C++.
85#ifndef SANITIZER_GO
86# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
87#else
88# define CPP_STAT_INC(typ) (void)0
89#endif
90
91namespace __tsan {
92
93const unsigned kInvalidTid = (unsigned)-1;
94
95ThreadClock::ThreadClock(unsigned tid, unsigned reused)
96    : tid_(tid)
97    , reused_(reused + 1) {  // 0 has special meaning
98  CHECK_LT(tid, kMaxTidInClock);
99  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
100  nclk_ = tid_ + 1;
101  last_acquire_ = 0;
102  internal_memset(clk_, 0, sizeof(clk_));
103  clk_[tid_].reused = reused_;
104}
105
106void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
107  DCHECK_LE(nclk_, kMaxTid);
108  DCHECK_LE(src->size_, kMaxTid);
109  CPP_STAT_INC(StatClockAcquire);
110
111  // Check if it's empty -> no need to do anything.
112  const uptr nclk = src->size_;
113  if (nclk == 0) {
114    CPP_STAT_INC(StatClockAcquireEmpty);
115    return;
116  }
117
118  // Check if we've already acquired src after the last release operation on src
119  bool acquired = false;
120  if (nclk > tid_) {
121    CPP_STAT_INC(StatClockAcquireLarge);
122    if (src->elem(tid_).reused == reused_) {
123      CPP_STAT_INC(StatClockAcquireRepeat);
124      for (unsigned i = 0; i < kDirtyTids; i++) {
125        unsigned tid = src->dirty_tids_[i];
126        if (tid != kInvalidTid) {
127          u64 epoch = src->elem(tid).epoch;
128          if (clk_[tid].epoch < epoch) {
129            clk_[tid].epoch = epoch;
130            acquired = true;
131          }
132        }
133      }
134      if (acquired) {
135        CPP_STAT_INC(StatClockAcquiredSomething);
136        last_acquire_ = clk_[tid_].epoch;
137      }
138      return;
139    }
140  }
141
142  // O(N) acquire.
143  CPP_STAT_INC(StatClockAcquireFull);
144  nclk_ = max(nclk_, nclk);
145  for (uptr i = 0; i < nclk; i++) {
146    u64 epoch = src->elem(i).epoch;
147    if (clk_[i].epoch < epoch) {
148      clk_[i].epoch = epoch;
149      acquired = true;
150    }
151  }
152
153  // Remember that this thread has acquired this clock.
154  if (nclk > tid_)
155    src->elem(tid_).reused = reused_;
156
157  if (acquired) {
158    CPP_STAT_INC(StatClockAcquiredSomething);
159    last_acquire_ = clk_[tid_].epoch;
160  }
161}
162
163void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
164  DCHECK_LE(nclk_, kMaxTid);
165  DCHECK_LE(dst->size_, kMaxTid);
166
167  if (dst->size_ == 0) {
168    // ReleaseStore will correctly set release_store_tid_,
169    // which can be important for future operations.
170    ReleaseStore(c, dst);
171    return;
172  }
173
174  CPP_STAT_INC(StatClockRelease);
175  // Check if we need to resize dst.
176  if (dst->size_ < nclk_)
177    dst->Resize(c, nclk_);
178
179  // Check if we had not acquired anything from other threads
180  // since the last release on dst. If so, we need to update
181  // only dst->elem(tid_).
182  if (dst->elem(tid_).epoch > last_acquire_) {
183    UpdateCurrentThread(dst);
184    if (dst->release_store_tid_ != tid_ ||
185        dst->release_store_reused_ != reused_)
186      dst->release_store_tid_ = kInvalidTid;
187    return;
188  }
189
190  // O(N) release.
191  CPP_STAT_INC(StatClockReleaseFull);
192  // First, remember whether we've acquired dst.
193  bool acquired = IsAlreadyAcquired(dst);
194  if (acquired)
195    CPP_STAT_INC(StatClockReleaseAcquired);
196  // Update dst->clk_.
197  for (uptr i = 0; i < nclk_; i++) {
198    ClockElem &ce = dst->elem(i);
199    ce.epoch = max(ce.epoch, clk_[i].epoch);
200    ce.reused = 0;
201  }
202  // Clear 'acquired' flag in the remaining elements.
203  if (nclk_ < dst->size_)
204    CPP_STAT_INC(StatClockReleaseClearTail);
205  for (uptr i = nclk_; i < dst->size_; i++)
206    dst->elem(i).reused = 0;
207  for (unsigned i = 0; i < kDirtyTids; i++)
208    dst->dirty_tids_[i] = kInvalidTid;
209  dst->release_store_tid_ = kInvalidTid;
210  dst->release_store_reused_ = 0;
211  // If we've acquired dst, remember this fact,
212  // so that we don't need to acquire it on next acquire.
213  if (acquired)
214    dst->elem(tid_).reused = reused_;
215}
216
217void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
218  DCHECK_LE(nclk_, kMaxTid);
219  DCHECK_LE(dst->size_, kMaxTid);
220  CPP_STAT_INC(StatClockStore);
221
222  // Check if we need to resize dst.
223  if (dst->size_ < nclk_)
224    dst->Resize(c, nclk_);
225
226  if (dst->release_store_tid_ == tid_ &&
227      dst->release_store_reused_ == reused_ &&
228      dst->elem(tid_).epoch > last_acquire_) {
229    CPP_STAT_INC(StatClockStoreFast);
230    UpdateCurrentThread(dst);
231    return;
232  }
233
234  // O(N) release-store.
235  CPP_STAT_INC(StatClockStoreFull);
236  for (uptr i = 0; i < nclk_; i++) {
237    ClockElem &ce = dst->elem(i);
238    ce.epoch = clk_[i].epoch;
239    ce.reused = 0;
240  }
241  // Clear the tail of dst->clk_.
242  if (nclk_ < dst->size_) {
243    for (uptr i = nclk_; i < dst->size_; i++) {
244      ClockElem &ce = dst->elem(i);
245      ce.epoch = 0;
246      ce.reused = 0;
247    }
248    CPP_STAT_INC(StatClockStoreTail);
249  }
250  for (unsigned i = 0; i < kDirtyTids; i++)
251    dst->dirty_tids_[i] = kInvalidTid;
252  dst->release_store_tid_ = tid_;
253  dst->release_store_reused_ = reused_;
254  // Rememeber that we don't need to acquire it in future.
255  dst->elem(tid_).reused = reused_;
256}
257
258void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
259  CPP_STAT_INC(StatClockAcquireRelease);
260  acquire(c, dst);
261  ReleaseStore(c, dst);
262}
263
264// Updates only single element related to the current thread in dst->clk_.
265void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
266  // Update the threads time, but preserve 'acquired' flag.
267  dst->elem(tid_).epoch = clk_[tid_].epoch;
268
269  for (unsigned i = 0; i < kDirtyTids; i++) {
270    if (dst->dirty_tids_[i] == tid_) {
271      CPP_STAT_INC(StatClockReleaseFast1);
272      return;
273    }
274    if (dst->dirty_tids_[i] == kInvalidTid) {
275      CPP_STAT_INC(StatClockReleaseFast2);
276      dst->dirty_tids_[i] = tid_;
277      return;
278    }
279  }
280  // Reset all 'acquired' flags, O(N).
281  CPP_STAT_INC(StatClockReleaseSlow);
282  for (uptr i = 0; i < dst->size_; i++)
283    dst->elem(i).reused = 0;
284  for (unsigned i = 0; i < kDirtyTids; i++)
285    dst->dirty_tids_[i] = kInvalidTid;
286}
287
288// Checks whether the current threads has already acquired src.
289bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
290  if (src->elem(tid_).reused != reused_)
291    return false;
292  for (unsigned i = 0; i < kDirtyTids; i++) {
293    unsigned tid = src->dirty_tids_[i];
294    if (tid != kInvalidTid) {
295      if (clk_[tid].epoch < src->elem(tid).epoch)
296        return false;
297    }
298  }
299  return true;
300}
301
302void SyncClock::Resize(ClockCache *c, uptr nclk) {
303  CPP_STAT_INC(StatClockReleaseResize);
304  if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
305      RoundUpTo(size_, ClockBlock::kClockCount)) {
306    // Growing within the same block.
307    // Memory is already allocated, just increase the size.
308    size_ = nclk;
309    return;
310  }
311  if (nclk <= ClockBlock::kClockCount) {
312    // Grow from 0 to one-level table.
313    CHECK_EQ(size_, 0);
314    CHECK_EQ(tab_, 0);
315    CHECK_EQ(tab_idx_, 0);
316    size_ = nclk;
317    tab_idx_ = ctx->clock_alloc.Alloc(c);
318    tab_ = ctx->clock_alloc.Map(tab_idx_);
319    internal_memset(tab_, 0, sizeof(*tab_));
320    return;
321  }
322  // Growing two-level table.
323  if (size_ == 0) {
324    // Allocate first level table.
325    tab_idx_ = ctx->clock_alloc.Alloc(c);
326    tab_ = ctx->clock_alloc.Map(tab_idx_);
327    internal_memset(tab_, 0, sizeof(*tab_));
328  } else if (size_ <= ClockBlock::kClockCount) {
329    // Transform one-level table to two-level table.
330    u32 old = tab_idx_;
331    tab_idx_ = ctx->clock_alloc.Alloc(c);
332    tab_ = ctx->clock_alloc.Map(tab_idx_);
333    internal_memset(tab_, 0, sizeof(*tab_));
334    tab_->table[0] = old;
335  }
336  // At this point we have first level table allocated.
337  // Add second level tables as necessary.
338  for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
339      i < nclk; i += ClockBlock::kClockCount) {
340    u32 idx = ctx->clock_alloc.Alloc(c);
341    ClockBlock *cb = ctx->clock_alloc.Map(idx);
342    internal_memset(cb, 0, sizeof(*cb));
343    CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
344    tab_->table[i/ClockBlock::kClockCount] = idx;
345  }
346  size_ = nclk;
347}
348
349// Sets a single element in the vector clock.
350// This function is called only from weird places like AcquireGlobal.
351void ThreadClock::set(unsigned tid, u64 v) {
352  DCHECK_LT(tid, kMaxTid);
353  DCHECK_GE(v, clk_[tid].epoch);
354  clk_[tid].epoch = v;
355  if (nclk_ <= tid)
356    nclk_ = tid + 1;
357  last_acquire_ = clk_[tid_].epoch;
358}
359
360void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
361  printf("clock=[");
362  for (uptr i = 0; i < nclk_; i++)
363    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
364  printf("] reused=[");
365  for (uptr i = 0; i < nclk_; i++)
366    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
367  printf("] tid=%u/%u last_acq=%llu",
368      tid_, reused_, last_acquire_);
369}
370
371SyncClock::SyncClock()
372    : release_store_tid_(kInvalidTid)
373    , release_store_reused_()
374    , tab_()
375    , tab_idx_()
376    , size_() {
377  for (uptr i = 0; i < kDirtyTids; i++)
378    dirty_tids_[i] = kInvalidTid;
379}
380
381SyncClock::~SyncClock() {
382  // Reset must be called before dtor.
383  CHECK_EQ(size_, 0);
384  CHECK_EQ(tab_, 0);
385  CHECK_EQ(tab_idx_, 0);
386}
387
388void SyncClock::Reset(ClockCache *c) {
389  if (size_ == 0) {
390    // nothing
391  } else if (size_ <= ClockBlock::kClockCount) {
392    // One-level table.
393    ctx->clock_alloc.Free(c, tab_idx_);
394  } else {
395    // Two-level table.
396    for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
397      ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
398    ctx->clock_alloc.Free(c, tab_idx_);
399  }
400  tab_ = 0;
401  tab_idx_ = 0;
402  size_ = 0;
403  release_store_tid_ = kInvalidTid;
404  release_store_reused_ = 0;
405  for (uptr i = 0; i < kDirtyTids; i++)
406    dirty_tids_[i] = kInvalidTid;
407}
408
409ClockElem &SyncClock::elem(unsigned tid) const {
410  DCHECK_LT(tid, size_);
411  if (size_ <= ClockBlock::kClockCount)
412    return tab_->clock[tid];
413  u32 idx = tab_->table[tid / ClockBlock::kClockCount];
414  ClockBlock *cb = ctx->clock_alloc.Map(idx);
415  return cb->clock[tid % ClockBlock::kClockCount];
416}
417
418void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
419  printf("clock=[");
420  for (uptr i = 0; i < size_; i++)
421    printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
422  printf("] reused=[");
423  for (uptr i = 0; i < size_; i++)
424    printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
425  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
426      release_store_tid_, release_store_reused_,
427      dirty_tids_[0], dirty_tids_[1]);
428}
429}  // namespace __tsan
430