tsan_clock.cc revision 6d1862363c88c183b0ed7740fca876342cf0474b
1//===-- tsan_clock.cc -----------------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is a part of ThreadSanitizer (TSan), a race detector. 11// 12//===----------------------------------------------------------------------===// 13#include "tsan_clock.h" 14#include "tsan_rtl.h" 15#include "sanitizer_common/sanitizer_placement_new.h" 16 17// SyncClock and ThreadClock implement vector clocks for sync variables 18// (mutexes, atomic variables, file descriptors, etc) and threads, respectively. 19// ThreadClock contains fixed-size vector clock for maximum number of threads. 20// SyncClock contains growable vector clock for currently necessary number of 21// threads. 22// Together they implement very simple model of operations, namely: 23// 24// void ThreadClock::acquire(const SyncClock *src) { 25// for (int i = 0; i < kMaxThreads; i++) 26// clock[i] = max(clock[i], src->clock[i]); 27// } 28// 29// void ThreadClock::release(SyncClock *dst) const { 30// for (int i = 0; i < kMaxThreads; i++) 31// dst->clock[i] = max(dst->clock[i], clock[i]); 32// } 33// 34// void ThreadClock::ReleaseStore(SyncClock *dst) const { 35// for (int i = 0; i < kMaxThreads; i++) 36// dst->clock[i] = clock[i]; 37// } 38// 39// void ThreadClock::acq_rel(SyncClock *dst) { 40// acquire(dst); 41// release(dst); 42// } 43// 44// Conformance to this model is extensively verified in tsan_clock_test.cc. 45// However, the implementation is significantly more complex. The complexity 46// allows to implement important classes of use cases in O(1) instead of O(N). 47// 48// The use cases are: 49// 1. Singleton/once atomic that has a single release-store operation followed 50// by zillions of acquire-loads (the acquire-load is O(1)). 51// 2. Thread-local mutex (both lock and unlock can be O(1)). 52// 3. Leaf mutex (unlock is O(1)). 53// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). 54// 5. An atomic with a single writer (writes can be O(1)). 55// The implementation dynamically adopts to workload. So if an atomic is in 56// read-only phase, these reads will be O(1); if it later switches to read/write 57// phase, the implementation will correctly handle that by switching to O(N). 58// 59// Thread-safety note: all const operations on SyncClock's are conducted under 60// a shared lock; all non-const operations on SyncClock's are conducted under 61// an exclusive lock; ThreadClock's are private to respective threads and so 62// do not need any protection. 63// 64// Description of ThreadClock state: 65// clk_ - fixed size vector clock. 66// nclk_ - effective size of the vector clock (the rest is zeros). 67// tid_ - index of the thread associated with he clock ("current thread"). 68// last_acquire_ - current thread time when it acquired something from 69// other threads. 70// 71// Description of SyncClock state: 72// clk_ - variable size vector clock, low kClkBits hold timestamp, 73// the remaining bits hold "acquired" flag (the actual value is thread's 74// reused counter); 75// if acquried == thr->reused_, then the respective thread has already 76// acquired this clock (except possibly dirty_tids_). 77// dirty_tids_ - holds up to two indeces in the vector clock that other threads 78// need to acquire regardless of "acquired" flag value; 79// release_store_tid_ - denotes that the clock state is a result of 80// release-store operation by the thread with release_store_tid_ index. 81// release_store_reused_ - reuse count of release_store_tid_. 82 83// We don't have ThreadState in these methods, so this is an ugly hack that 84// works only in C++. 85#ifndef TSAN_GO 86# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ) 87#else 88# define CPP_STAT_INC(typ) (void)0 89#endif 90 91namespace __tsan { 92 93const unsigned kInvalidTid = (unsigned)-1; 94 95ThreadClock::ThreadClock(unsigned tid, unsigned reused) 96 : tid_(tid) 97 , reused_(reused + 1) { // 0 has special meaning 98 CHECK_LT(tid, kMaxTidInClock); 99 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); 100 nclk_ = tid_ + 1; 101 last_acquire_ = 0; 102 internal_memset(clk_, 0, sizeof(clk_)); 103 clk_[tid_].reused = reused_; 104} 105 106void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { 107 DCHECK(nclk_ <= kMaxTid); 108 DCHECK(src->size_ <= kMaxTid); 109 CPP_STAT_INC(StatClockAcquire); 110 111 // Check if it's empty -> no need to do anything. 112 const uptr nclk = src->size_; 113 if (nclk == 0) { 114 CPP_STAT_INC(StatClockAcquireEmpty); 115 return; 116 } 117 118 // Check if we've already acquired src after the last release operation on src 119 bool acquired = false; 120 if (nclk > tid_) { 121 CPP_STAT_INC(StatClockAcquireLarge); 122 if (src->elem(tid_).reused == reused_) { 123 CPP_STAT_INC(StatClockAcquireRepeat); 124 for (unsigned i = 0; i < kDirtyTids; i++) { 125 unsigned tid = src->dirty_tids_[i]; 126 if (tid != kInvalidTid) { 127 u64 epoch = src->elem(tid).epoch; 128 if (clk_[tid].epoch < epoch) { 129 clk_[tid].epoch = epoch; 130 acquired = true; 131 } 132 } 133 } 134 if (acquired) { 135 CPP_STAT_INC(StatClockAcquiredSomething); 136 last_acquire_ = clk_[tid_].epoch; 137 } 138 return; 139 } 140 } 141 142 // O(N) acquire. 143 CPP_STAT_INC(StatClockAcquireFull); 144 nclk_ = max(nclk_, nclk); 145 for (uptr i = 0; i < nclk; i++) { 146 u64 epoch = src->elem(i).epoch; 147 if (clk_[i].epoch < epoch) { 148 clk_[i].epoch = epoch; 149 acquired = true; 150 } 151 } 152 153 // Remember that this thread has acquired this clock. 154 if (nclk > tid_) 155 src->elem(tid_).reused = reused_; 156 157 if (acquired) { 158 CPP_STAT_INC(StatClockAcquiredSomething); 159 last_acquire_ = clk_[tid_].epoch; 160 } 161} 162 163void ThreadClock::release(ClockCache *c, SyncClock *dst) const { 164 DCHECK_LE(nclk_, kMaxTid); 165 DCHECK_LE(dst->size_, kMaxTid); 166 167 if (dst->size_ == 0) { 168 // ReleaseStore will correctly set release_store_tid_, 169 // which can be important for future operations. 170 ReleaseStore(c, dst); 171 return; 172 } 173 174 CPP_STAT_INC(StatClockRelease); 175 // Check if we need to resize dst. 176 if (dst->size_ < nclk_) 177 dst->Resize(c, nclk_); 178 179 // Check if we had not acquired anything from other threads 180 // since the last release on dst. If so, we need to update 181 // only dst->elem(tid_). 182 if (dst->elem(tid_).epoch > last_acquire_) { 183 UpdateCurrentThread(dst); 184 if (dst->release_store_tid_ != tid_ || 185 dst->release_store_reused_ != reused_) 186 dst->release_store_tid_ = kInvalidTid; 187 return; 188 } 189 190 // O(N) release. 191 CPP_STAT_INC(StatClockReleaseFull); 192 // First, remember whether we've acquired dst. 193 bool acquired = IsAlreadyAcquired(dst); 194 if (acquired) 195 CPP_STAT_INC(StatClockReleaseAcquired); 196 // Update dst->clk_. 197 for (uptr i = 0; i < nclk_; i++) { 198 ClockElem &ce = dst->elem(i); 199 ce.epoch = max(ce.epoch, clk_[i].epoch); 200 ce.reused = 0; 201 } 202 // Clear 'acquired' flag in the remaining elements. 203 if (nclk_ < dst->size_) 204 CPP_STAT_INC(StatClockReleaseClearTail); 205 for (uptr i = nclk_; i < dst->size_; i++) 206 dst->elem(i).reused = 0; 207 for (unsigned i = 0; i < kDirtyTids; i++) 208 dst->dirty_tids_[i] = kInvalidTid; 209 dst->release_store_tid_ = kInvalidTid; 210 dst->release_store_reused_ = 0; 211 // If we've acquired dst, remember this fact, 212 // so that we don't need to acquire it on next acquire. 213 if (acquired) 214 dst->elem(tid_).reused = reused_; 215} 216 217void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { 218 DCHECK(nclk_ <= kMaxTid); 219 DCHECK(dst->size_ <= kMaxTid); 220 CPP_STAT_INC(StatClockStore); 221 222 // Check if we need to resize dst. 223 if (dst->size_ < nclk_) 224 dst->Resize(c, nclk_); 225 226 if (dst->release_store_tid_ == tid_ && 227 dst->release_store_reused_ == reused_ && 228 dst->elem(tid_).epoch > last_acquire_) { 229 CPP_STAT_INC(StatClockStoreFast); 230 UpdateCurrentThread(dst); 231 return; 232 } 233 234 // O(N) release-store. 235 CPP_STAT_INC(StatClockStoreFull); 236 for (uptr i = 0; i < nclk_; i++) { 237 ClockElem &ce = dst->elem(i); 238 ce.epoch = clk_[i].epoch; 239 ce.reused = 0; 240 } 241 // Clear the tail of dst->clk_. 242 if (nclk_ < dst->size_) { 243 for (uptr i = nclk_; i < dst->size_; i++) { 244 ClockElem &ce = dst->elem(i); 245 ce.epoch = 0; 246 ce.reused = 0; 247 } 248 CPP_STAT_INC(StatClockStoreTail); 249 } 250 for (unsigned i = 0; i < kDirtyTids; i++) 251 dst->dirty_tids_[i] = kInvalidTid; 252 dst->release_store_tid_ = tid_; 253 dst->release_store_reused_ = reused_; 254 // Rememeber that we don't need to acquire it in future. 255 dst->elem(tid_).reused = reused_; 256} 257 258void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { 259 CPP_STAT_INC(StatClockAcquireRelease); 260 acquire(c, dst); 261 ReleaseStore(c, dst); 262} 263 264// Updates only single element related to the current thread in dst->clk_. 265void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { 266 // Update the threads time, but preserve 'acquired' flag. 267 dst->elem(tid_).epoch = clk_[tid_].epoch; 268 269 for (unsigned i = 0; i < kDirtyTids; i++) { 270 if (dst->dirty_tids_[i] == tid_) { 271 CPP_STAT_INC(StatClockReleaseFast1); 272 return; 273 } 274 if (dst->dirty_tids_[i] == kInvalidTid) { 275 CPP_STAT_INC(StatClockReleaseFast2); 276 dst->dirty_tids_[i] = tid_; 277 return; 278 } 279 } 280 // Reset all 'acquired' flags, O(N). 281 CPP_STAT_INC(StatClockReleaseSlow); 282 for (uptr i = 0; i < dst->size_; i++) 283 dst->elem(i).reused = 0; 284 for (unsigned i = 0; i < kDirtyTids; i++) 285 dst->dirty_tids_[i] = kInvalidTid; 286} 287 288// Checks whether the current threads has already acquired src. 289bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { 290 if (src->elem(tid_).reused != reused_) 291 return false; 292 for (unsigned i = 0; i < kDirtyTids; i++) { 293 unsigned tid = src->dirty_tids_[i]; 294 if (tid != kInvalidTid) { 295 if (clk_[tid].epoch < src->elem(tid).epoch) 296 return false; 297 } 298 } 299 return true; 300} 301 302void SyncClock::Resize(ClockCache *c, uptr nclk) { 303 CPP_STAT_INC(StatClockReleaseResize); 304 if (RoundUpTo(nclk, ClockBlock::kClockCount) <= 305 RoundUpTo(size_, ClockBlock::kClockCount)) { 306 // Growing within the same block. 307 // Memory is already allocated, just increase the size. 308 size_ = nclk; 309 return; 310 } 311 if (nclk <= ClockBlock::kClockCount) { 312 // Grow from 0 to one-level table. 313 CHECK_EQ(size_, 0); 314 CHECK_EQ(tab_, 0); 315 CHECK_EQ(tab_idx_, 0); 316 size_ = nclk; 317 tab_idx_ = ctx->clock_alloc.Alloc(c); 318 tab_ = ctx->clock_alloc.Map(tab_idx_); 319 internal_memset(tab_, 0, sizeof(*tab_)); 320 return; 321 } 322 // Growing two-level table. 323 if (size_ == 0) { 324 // Allocate first level table. 325 tab_idx_ = ctx->clock_alloc.Alloc(c); 326 tab_ = ctx->clock_alloc.Map(tab_idx_); 327 internal_memset(tab_, 0, sizeof(*tab_)); 328 } else if (size_ <= ClockBlock::kClockCount) { 329 // Transform one-level table to two-level table. 330 u32 old = tab_idx_; 331 tab_idx_ = ctx->clock_alloc.Alloc(c); 332 tab_ = ctx->clock_alloc.Map(tab_idx_); 333 internal_memset(tab_, 0, sizeof(*tab_)); 334 tab_->table[0] = old; 335 } 336 // At this point we have first level table allocated. 337 // Add second level tables as necessary. 338 for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount); 339 i < nclk; i += ClockBlock::kClockCount) { 340 u32 idx = ctx->clock_alloc.Alloc(c); 341 ClockBlock *cb = ctx->clock_alloc.Map(idx); 342 internal_memset(cb, 0, sizeof(*cb)); 343 CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0); 344 tab_->table[i/ClockBlock::kClockCount] = idx; 345 } 346 size_ = nclk; 347} 348 349// Sets a single element in the vector clock. 350// This function is called only from weird places like AcquireGlobal. 351void ThreadClock::set(unsigned tid, u64 v) { 352 DCHECK_LT(tid, kMaxTid); 353 DCHECK_GE(v, clk_[tid].epoch); 354 clk_[tid].epoch = v; 355 if (nclk_ <= tid) 356 nclk_ = tid + 1; 357 last_acquire_ = clk_[tid_].epoch; 358} 359 360void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { 361 printf("clock=["); 362 for (uptr i = 0; i < nclk_; i++) 363 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); 364 printf("] reused=["); 365 for (uptr i = 0; i < nclk_; i++) 366 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); 367 printf("] tid=%u/%u last_acq=%llu", 368 tid_, reused_, last_acquire_); 369} 370 371SyncClock::SyncClock() 372 : release_store_tid_(kInvalidTid) 373 , release_store_reused_() 374 , tab_() 375 , tab_idx_() 376 , size_() { 377 for (uptr i = 0; i < kDirtyTids; i++) 378 dirty_tids_[i] = kInvalidTid; 379} 380 381SyncClock::~SyncClock() { 382 // Reset must be called before dtor. 383 CHECK_EQ(size_, 0); 384 CHECK_EQ(tab_, 0); 385 CHECK_EQ(tab_idx_, 0); 386} 387 388void SyncClock::Reset(ClockCache *c) { 389 if (size_ == 0) { 390 // nothing 391 } else if (size_ <= ClockBlock::kClockCount) { 392 // One-level table. 393 ctx->clock_alloc.Free(c, tab_idx_); 394 } else { 395 // Two-level table. 396 for (uptr i = 0; i < size_; i += ClockBlock::kClockCount) 397 ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]); 398 ctx->clock_alloc.Free(c, tab_idx_); 399 } 400 tab_ = 0; 401 tab_idx_ = 0; 402 size_ = 0; 403 release_store_tid_ = kInvalidTid; 404 release_store_reused_ = 0; 405 for (uptr i = 0; i < kDirtyTids; i++) 406 dirty_tids_[i] = kInvalidTid; 407} 408 409ClockElem &SyncClock::elem(unsigned tid) const { 410 DCHECK_LT(tid, size_); 411 if (size_ <= ClockBlock::kClockCount) 412 return tab_->clock[tid]; 413 u32 idx = tab_->table[tid / ClockBlock::kClockCount]; 414 ClockBlock *cb = ctx->clock_alloc.Map(idx); 415 return cb->clock[tid % ClockBlock::kClockCount]; 416} 417 418void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { 419 printf("clock=["); 420 for (uptr i = 0; i < size_; i++) 421 printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); 422 printf("] reused=["); 423 for (uptr i = 0; i < size_; i++) 424 printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); 425 printf("] release_store_tid=%d/%d dirty_tids=%d/%d", 426 release_store_tid_, release_store_reused_, 427 dirty_tids_[0], dirty_tids_[1]); 428} 429} // namespace __tsan 430