1//===-- tsan_clock.cc -----------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of ThreadSanitizer (TSan), a race detector.
11//
12//===----------------------------------------------------------------------===//
13#include "tsan_clock.h"
14#include "tsan_rtl.h"
15
16// SyncClock and ThreadClock implement vector clocks for sync variables
17// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
18// ThreadClock contains fixed-size vector clock for maximum number of threads.
19// SyncClock contains growable vector clock for currently necessary number of
20// threads.
21// Together they implement very simple model of operations, namely:
22//
23//   void ThreadClock::acquire(const SyncClock *src) {
24//     for (int i = 0; i < kMaxThreads; i++)
25//       clock[i] = max(clock[i], src->clock[i]);
26//   }
27//
28//   void ThreadClock::release(SyncClock *dst) const {
29//     for (int i = 0; i < kMaxThreads; i++)
30//       dst->clock[i] = max(dst->clock[i], clock[i]);
31//   }
32//
33//   void ThreadClock::ReleaseStore(SyncClock *dst) const {
34//     for (int i = 0; i < kMaxThreads; i++)
35//       dst->clock[i] = clock[i];
36//   }
37//
38//   void ThreadClock::acq_rel(SyncClock *dst) {
39//     acquire(dst);
40//     release(dst);
41//   }
42//
43// Conformance to this model is extensively verified in tsan_clock_test.cc.
44// However, the implementation is significantly more complex. The complexity
45// allows to implement important classes of use cases in O(1) instead of O(N).
46//
47// The use cases are:
48// 1. Singleton/once atomic that has a single release-store operation followed
49//    by zillions of acquire-loads (the acquire-load is O(1)).
50// 2. Thread-local mutex (both lock and unlock can be O(1)).
51// 3. Leaf mutex (unlock is O(1)).
52// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
53// 5. An atomic with a single writer (writes can be O(1)).
54// The implementation dynamically adopts to workload. So if an atomic is in
55// read-only phase, these reads will be O(1); if it later switches to read/write
56// phase, the implementation will correctly handle that by switching to O(N).
57//
58// Thread-safety note: all const operations on SyncClock's are conducted under
59// a shared lock; all non-const operations on SyncClock's are conducted under
60// an exclusive lock; ThreadClock's are private to respective threads and so
61// do not need any protection.
62//
63// Description of ThreadClock state:
64// clk_ - fixed size vector clock.
65// nclk_ - effective size of the vector clock (the rest is zeros).
66// tid_ - index of the thread associated with he clock ("current thread").
67// last_acquire_ - current thread time when it acquired something from
68//   other threads.
69//
70// Description of SyncClock state:
71// clk_ - variable size vector clock, low kClkBits hold timestamp,
72//   the remaining bits hold "acquired" flag (the actual value is thread's
73//   reused counter);
74//   if acquried == thr->reused_, then the respective thread has already
75//   acquired this clock (except possibly dirty_tids_).
76// dirty_tids_ - holds up to two indeces in the vector clock that other threads
77//   need to acquire regardless of "acquired" flag value;
78// release_store_tid_ - denotes that the clock state is a result of
79//   release-store operation by the thread with release_store_tid_ index.
80// release_store_reused_ - reuse count of release_store_tid_.
81
82// We don't have ThreadState in these methods, so this is an ugly hack that
83// works only in C++.
84#ifndef TSAN_GO
85# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
86#else
87# define CPP_STAT_INC(typ) (void)0
88#endif
89
90namespace __tsan {
91
92const unsigned kInvalidTid = (unsigned)-1;
93
94ThreadClock::ThreadClock(unsigned tid, unsigned reused)
95    : tid_(tid)
96    , reused_(reused + 1) {  // 0 has special meaning
97  CHECK_LT(tid, kMaxTidInClock);
98  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
99  nclk_ = tid_ + 1;
100  last_acquire_ = 0;
101  internal_memset(clk_, 0, sizeof(clk_));
102  clk_[tid_].reused = reused_;
103}
104
105void ThreadClock::acquire(const SyncClock *src) {
106  DCHECK(nclk_ <= kMaxTid);
107  DCHECK(src->clk_.Size() <= kMaxTid);
108  CPP_STAT_INC(StatClockAcquire);
109
110  // Check if it's empty -> no need to do anything.
111  const uptr nclk = src->clk_.Size();
112  if (nclk == 0) {
113    CPP_STAT_INC(StatClockAcquireEmpty);
114    return;
115  }
116
117  // Check if we've already acquired src after the last release operation on src
118  bool acquired = false;
119  if (nclk > tid_) {
120    CPP_STAT_INC(StatClockAcquireLarge);
121    if (src->clk_[tid_].reused == reused_) {
122      CPP_STAT_INC(StatClockAcquireRepeat);
123      for (unsigned i = 0; i < kDirtyTids; i++) {
124        unsigned tid = src->dirty_tids_[i];
125        if (tid != kInvalidTid) {
126          u64 epoch = src->clk_[tid].epoch;
127          if (clk_[tid].epoch < epoch) {
128            clk_[tid].epoch = epoch;
129            acquired = true;
130          }
131        }
132      }
133      if (acquired) {
134        CPP_STAT_INC(StatClockAcquiredSomething);
135        last_acquire_ = clk_[tid_].epoch;
136      }
137      return;
138    }
139  }
140
141  // O(N) acquire.
142  CPP_STAT_INC(StatClockAcquireFull);
143  nclk_ = max(nclk_, nclk);
144  for (uptr i = 0; i < nclk; i++) {
145    u64 epoch = src->clk_[i].epoch;
146    if (clk_[i].epoch < epoch) {
147      clk_[i].epoch = epoch;
148      acquired = true;
149    }
150  }
151
152  // Remember that this thread has acquired this clock.
153  if (nclk > tid_)
154    src->clk_[tid_].reused = reused_;
155
156  if (acquired) {
157    CPP_STAT_INC(StatClockAcquiredSomething);
158    last_acquire_ = clk_[tid_].epoch;
159  }
160}
161
162void ThreadClock::release(SyncClock *dst) const {
163  DCHECK_LE(nclk_, kMaxTid);
164  DCHECK_LE(dst->clk_.Size(), kMaxTid);
165
166  if (dst->clk_.Size() == 0) {
167    // ReleaseStore will correctly set release_store_tid_,
168    // which can be important for future operations.
169    ReleaseStore(dst);
170    return;
171  }
172
173  CPP_STAT_INC(StatClockRelease);
174  // Check if we need to resize dst.
175  if (dst->clk_.Size() < nclk_) {
176    CPP_STAT_INC(StatClockReleaseResize);
177    dst->clk_.Resize(nclk_);
178  }
179
180  // Check if we had not acquired anything from other threads
181  // since the last release on dst. If so, we need to update
182  // only dst->clk_[tid_].
183  if (dst->clk_[tid_].epoch > last_acquire_) {
184    UpdateCurrentThread(dst);
185    if (dst->release_store_tid_ != tid_ ||
186        dst->release_store_reused_ != reused_)
187      dst->release_store_tid_ = kInvalidTid;
188    return;
189  }
190
191  // O(N) release.
192  CPP_STAT_INC(StatClockReleaseFull);
193  // First, remember whether we've acquired dst.
194  bool acquired = IsAlreadyAcquired(dst);
195  if (acquired)
196    CPP_STAT_INC(StatClockReleaseAcquired);
197  // Update dst->clk_.
198  for (uptr i = 0; i < nclk_; i++) {
199    dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
200    dst->clk_[i].reused = 0;
201  }
202  // Clear 'acquired' flag in the remaining elements.
203  if (nclk_ < dst->clk_.Size())
204    CPP_STAT_INC(StatClockReleaseClearTail);
205  for (uptr i = nclk_; i < dst->clk_.Size(); i++)
206    dst->clk_[i].reused = 0;
207  for (unsigned i = 0; i < kDirtyTids; i++)
208    dst->dirty_tids_[i] = kInvalidTid;
209  dst->release_store_tid_ = kInvalidTid;
210  dst->release_store_reused_ = 0;
211  // If we've acquired dst, remember this fact,
212  // so that we don't need to acquire it on next acquire.
213  if (acquired)
214    dst->clk_[tid_].reused = reused_;
215}
216
217void ThreadClock::ReleaseStore(SyncClock *dst) const {
218  DCHECK(nclk_ <= kMaxTid);
219  DCHECK(dst->clk_.Size() <= kMaxTid);
220  CPP_STAT_INC(StatClockStore);
221
222  // Check if we need to resize dst.
223  if (dst->clk_.Size() < nclk_) {
224    CPP_STAT_INC(StatClockStoreResize);
225    dst->clk_.Resize(nclk_);
226  }
227
228  if (dst->release_store_tid_ == tid_ &&
229      dst->release_store_reused_ == reused_ &&
230      dst->clk_[tid_].epoch > last_acquire_) {
231    CPP_STAT_INC(StatClockStoreFast);
232    UpdateCurrentThread(dst);
233    return;
234  }
235
236  // O(N) release-store.
237  CPP_STAT_INC(StatClockStoreFull);
238  for (uptr i = 0; i < nclk_; i++) {
239    dst->clk_[i].epoch = clk_[i].epoch;
240    dst->clk_[i].reused = 0;
241  }
242  // Clear the tail of dst->clk_.
243  if (nclk_ < dst->clk_.Size()) {
244    internal_memset(&dst->clk_[nclk_], 0,
245        (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
246    CPP_STAT_INC(StatClockStoreTail);
247  }
248  for (unsigned i = 0; i < kDirtyTids; i++)
249    dst->dirty_tids_[i] = kInvalidTid;
250  dst->release_store_tid_ = tid_;
251  dst->release_store_reused_ = reused_;
252  // Rememeber that we don't need to acquire it in future.
253  dst->clk_[tid_].reused = reused_;
254}
255
256void ThreadClock::acq_rel(SyncClock *dst) {
257  CPP_STAT_INC(StatClockAcquireRelease);
258  acquire(dst);
259  ReleaseStore(dst);
260}
261
262// Updates only single element related to the current thread in dst->clk_.
263void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
264  // Update the threads time, but preserve 'acquired' flag.
265  dst->clk_[tid_].epoch = clk_[tid_].epoch;
266
267  for (unsigned i = 0; i < kDirtyTids; i++) {
268    if (dst->dirty_tids_[i] == tid_) {
269      CPP_STAT_INC(StatClockReleaseFast1);
270      return;
271    }
272    if (dst->dirty_tids_[i] == kInvalidTid) {
273      CPP_STAT_INC(StatClockReleaseFast2);
274      dst->dirty_tids_[i] = tid_;
275      return;
276    }
277  }
278  // Reset all 'acquired' flags, O(N).
279  CPP_STAT_INC(StatClockReleaseSlow);
280  for (uptr i = 0; i < dst->clk_.Size(); i++) {
281    dst->clk_[i].reused = 0;
282  }
283  for (unsigned i = 0; i < kDirtyTids; i++)
284    dst->dirty_tids_[i] = kInvalidTid;
285}
286
287// Checks whether the current threads has already acquired src.
288bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
289  if (src->clk_[tid_].reused != reused_)
290    return false;
291  for (unsigned i = 0; i < kDirtyTids; i++) {
292    unsigned tid = src->dirty_tids_[i];
293    if (tid != kInvalidTid) {
294      if (clk_[tid].epoch < src->clk_[tid].epoch)
295        return false;
296    }
297  }
298  return true;
299}
300
301// Sets a single element in the vector clock.
302// This function is called only from weird places like AcquireGlobal.
303void ThreadClock::set(unsigned tid, u64 v) {
304  DCHECK_LT(tid, kMaxTid);
305  DCHECK_GE(v, clk_[tid].epoch);
306  clk_[tid].epoch = v;
307  if (nclk_ <= tid)
308    nclk_ = tid + 1;
309  last_acquire_ = clk_[tid_].epoch;
310}
311
312void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
313  printf("clock=[");
314  for (uptr i = 0; i < nclk_; i++)
315    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
316  printf("] reused=[");
317  for (uptr i = 0; i < nclk_; i++)
318    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
319  printf("] tid=%u/%u last_acq=%llu",
320      tid_, reused_, last_acquire_);
321}
322
323SyncClock::SyncClock()
324    : clk_(MBlockClock) {
325  release_store_tid_ = kInvalidTid;
326  release_store_reused_ = 0;
327  for (uptr i = 0; i < kDirtyTids; i++)
328    dirty_tids_[i] = kInvalidTid;
329}
330
331void SyncClock::Reset() {
332  clk_.Reset();
333  Zero();
334}
335
336void SyncClock::Zero() {
337  clk_.Resize(0);
338  release_store_tid_ = kInvalidTid;
339  release_store_reused_ = 0;
340  for (uptr i = 0; i < kDirtyTids; i++)
341    dirty_tids_[i] = kInvalidTid;
342}
343
344void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
345  printf("clock=[");
346  for (uptr i = 0; i < clk_.Size(); i++)
347    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
348  printf("] reused=[");
349  for (uptr i = 0; i < clk_.Size(); i++)
350    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
351  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
352      release_store_tid_, release_store_reused_,
353      dirty_tids_[0], dirty_tids_[1]);
354}
355}  // namespace __tsan
356