1//===-- tsan_rtl.cc -------------------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is a part of ThreadSanitizer (TSan), a race detector. 11// 12// Main file (entry points) for the TSan run-time. 13//===----------------------------------------------------------------------===// 14 15#include "sanitizer_common/sanitizer_atomic.h" 16#include "sanitizer_common/sanitizer_common.h" 17#include "sanitizer_common/sanitizer_libc.h" 18#include "sanitizer_common/sanitizer_stackdepot.h" 19#include "sanitizer_common/sanitizer_placement_new.h" 20#include "sanitizer_common/sanitizer_symbolizer.h" 21#include "tsan_defs.h" 22#include "tsan_platform.h" 23#include "tsan_rtl.h" 24#include "tsan_mman.h" 25#include "tsan_suppressions.h" 26#include "tsan_symbolize.h" 27#include "ubsan/ubsan_init.h" 28 29#ifdef __SSE3__ 30// <emmintrin.h> transitively includes <stdlib.h>, 31// and it's prohibited to include std headers into tsan runtime. 32// So we do this dirty trick. 33#define _MM_MALLOC_H_INCLUDED 34#define __MM_MALLOC_H 35#include <emmintrin.h> 36typedef __m128i m128; 37#endif 38 39volatile int __tsan_resumed = 0; 40 41extern "C" void __tsan_resume() { 42 __tsan_resumed = 1; 43} 44 45namespace __tsan { 46 47#if !defined(SANITIZER_GO) && !SANITIZER_MAC 48THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64); 49#endif 50static char ctx_placeholder[sizeof(Context)] ALIGNED(64); 51Context *ctx; 52 53// Can be overriden by a front-end. 54#ifdef TSAN_EXTERNAL_HOOKS 55bool OnFinalize(bool failed); 56void OnInitialize(); 57#else 58SANITIZER_WEAK_CXX_DEFAULT_IMPL 59bool OnFinalize(bool failed) { 60 return failed; 61} 62SANITIZER_WEAK_CXX_DEFAULT_IMPL 63void OnInitialize() {} 64#endif 65 66static char thread_registry_placeholder[sizeof(ThreadRegistry)]; 67 68static ThreadContextBase *CreateThreadContext(u32 tid) { 69 // Map thread trace when context is created. 70 char name[50]; 71 internal_snprintf(name, sizeof(name), "trace %u", tid); 72 MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name); 73 const uptr hdr = GetThreadTraceHeader(tid); 74 internal_snprintf(name, sizeof(name), "trace header %u", tid); 75 MapThreadTrace(hdr, sizeof(Trace), name); 76 new((void*)hdr) Trace(); 77 // We are going to use only a small part of the trace with the default 78 // value of history_size. However, the constructor writes to the whole trace. 79 // Unmap the unused part. 80 uptr hdr_end = hdr + sizeof(Trace); 81 hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts()); 82 hdr_end = RoundUp(hdr_end, GetPageSizeCached()); 83 if (hdr_end < hdr + sizeof(Trace)) 84 UnmapOrDie((void*)hdr_end, hdr + sizeof(Trace) - hdr_end); 85 void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext)); 86 return new(mem) ThreadContext(tid); 87} 88 89#ifndef SANITIZER_GO 90static const u32 kThreadQuarantineSize = 16; 91#else 92static const u32 kThreadQuarantineSize = 64; 93#endif 94 95Context::Context() 96 : initialized() 97 , report_mtx(MutexTypeReport, StatMtxReport) 98 , nreported() 99 , nmissed_expected() 100 , thread_registry(new(thread_registry_placeholder) ThreadRegistry( 101 CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse)) 102 , racy_mtx(MutexTypeRacy, StatMtxRacy) 103 , racy_stacks(MBlockRacyStacks) 104 , racy_addresses(MBlockRacyAddresses) 105 , fired_suppressions_mtx(MutexTypeFired, StatMtxFired) 106 , fired_suppressions(8) { 107} 108 109// The objects are allocated in TLS, so one may rely on zero-initialization. 110ThreadState::ThreadState(Context *ctx, int tid, int unique_id, u64 epoch, 111 unsigned reuse_count, 112 uptr stk_addr, uptr stk_size, 113 uptr tls_addr, uptr tls_size) 114 : fast_state(tid, epoch) 115 // Do not touch these, rely on zero initialization, 116 // they may be accessed before the ctor. 117 // , ignore_reads_and_writes() 118 // , ignore_interceptors() 119 , clock(tid, reuse_count) 120#ifndef SANITIZER_GO 121 , jmp_bufs(MBlockJmpBuf) 122#endif 123 , tid(tid) 124 , unique_id(unique_id) 125 , stk_addr(stk_addr) 126 , stk_size(stk_size) 127 , tls_addr(tls_addr) 128 , tls_size(tls_size) 129#ifndef SANITIZER_GO 130 , last_sleep_clock(tid) 131#endif 132{ 133} 134 135#ifndef SANITIZER_GO 136static void MemoryProfiler(Context *ctx, fd_t fd, int i) { 137 uptr n_threads; 138 uptr n_running_threads; 139 ctx->thread_registry->GetNumberOfThreads(&n_threads, &n_running_threads); 140 InternalScopedBuffer<char> buf(4096); 141 WriteMemoryProfile(buf.data(), buf.size(), n_threads, n_running_threads); 142 WriteToFile(fd, buf.data(), internal_strlen(buf.data())); 143} 144 145static void BackgroundThread(void *arg) { 146 // This is a non-initialized non-user thread, nothing to see here. 147 // We don't use ScopedIgnoreInterceptors, because we want ignores to be 148 // enabled even when the thread function exits (e.g. during pthread thread 149 // shutdown code). 150 cur_thread()->ignore_interceptors++; 151 const u64 kMs2Ns = 1000 * 1000; 152 153 fd_t mprof_fd = kInvalidFd; 154 if (flags()->profile_memory && flags()->profile_memory[0]) { 155 if (internal_strcmp(flags()->profile_memory, "stdout") == 0) { 156 mprof_fd = 1; 157 } else if (internal_strcmp(flags()->profile_memory, "stderr") == 0) { 158 mprof_fd = 2; 159 } else { 160 InternalScopedString filename(kMaxPathLength); 161 filename.append("%s.%d", flags()->profile_memory, (int)internal_getpid()); 162 fd_t fd = OpenFile(filename.data(), WrOnly); 163 if (fd == kInvalidFd) { 164 Printf("ThreadSanitizer: failed to open memory profile file '%s'\n", 165 &filename[0]); 166 } else { 167 mprof_fd = fd; 168 } 169 } 170 } 171 172 u64 last_flush = NanoTime(); 173 uptr last_rss = 0; 174 for (int i = 0; 175 atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0; 176 i++) { 177 SleepForMillis(100); 178 u64 now = NanoTime(); 179 180 // Flush memory if requested. 181 if (flags()->flush_memory_ms > 0) { 182 if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) { 183 VPrintf(1, "ThreadSanitizer: periodic memory flush\n"); 184 FlushShadowMemory(); 185 last_flush = NanoTime(); 186 } 187 } 188 // GetRSS can be expensive on huge programs, so don't do it every 100ms. 189 if (flags()->memory_limit_mb > 0) { 190 uptr rss = GetRSS(); 191 uptr limit = uptr(flags()->memory_limit_mb) << 20; 192 VPrintf(1, "ThreadSanitizer: memory flush check" 193 " RSS=%llu LAST=%llu LIMIT=%llu\n", 194 (u64)rss >> 20, (u64)last_rss >> 20, (u64)limit >> 20); 195 if (2 * rss > limit + last_rss) { 196 VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n"); 197 FlushShadowMemory(); 198 rss = GetRSS(); 199 VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20); 200 } 201 last_rss = rss; 202 } 203 204 // Write memory profile if requested. 205 if (mprof_fd != kInvalidFd) 206 MemoryProfiler(ctx, mprof_fd, i); 207 208 // Flush symbolizer cache if requested. 209 if (flags()->flush_symbolizer_ms > 0) { 210 u64 last = atomic_load(&ctx->last_symbolize_time_ns, 211 memory_order_relaxed); 212 if (last != 0 && last + flags()->flush_symbolizer_ms * kMs2Ns < now) { 213 Lock l(&ctx->report_mtx); 214 SpinMutexLock l2(&CommonSanitizerReportMutex); 215 SymbolizeFlush(); 216 atomic_store(&ctx->last_symbolize_time_ns, 0, memory_order_relaxed); 217 } 218 } 219 } 220} 221 222static void StartBackgroundThread() { 223 ctx->background_thread = internal_start_thread(&BackgroundThread, 0); 224} 225 226#ifndef __mips__ 227static void StopBackgroundThread() { 228 atomic_store(&ctx->stop_background_thread, 1, memory_order_relaxed); 229 internal_join_thread(ctx->background_thread); 230 ctx->background_thread = 0; 231} 232#endif 233#endif 234 235void DontNeedShadowFor(uptr addr, uptr size) { 236 uptr shadow_beg = MemToShadow(addr); 237 uptr shadow_end = MemToShadow(addr + size); 238 FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg); 239} 240 241void MapShadow(uptr addr, uptr size) { 242 // Global data is not 64K aligned, but there are no adjacent mappings, 243 // so we can get away with unaligned mapping. 244 // CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment 245 MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier, "shadow"); 246 247 // Meta shadow is 2:1, so tread carefully. 248 static bool data_mapped = false; 249 static uptr mapped_meta_end = 0; 250 uptr meta_begin = (uptr)MemToMeta(addr); 251 uptr meta_end = (uptr)MemToMeta(addr + size); 252 meta_begin = RoundDownTo(meta_begin, 64 << 10); 253 meta_end = RoundUpTo(meta_end, 64 << 10); 254 if (!data_mapped) { 255 // First call maps data+bss. 256 data_mapped = true; 257 MmapFixedNoReserve(meta_begin, meta_end - meta_begin, "meta shadow"); 258 } else { 259 // Mapping continous heap. 260 // Windows wants 64K alignment. 261 meta_begin = RoundDownTo(meta_begin, 64 << 10); 262 meta_end = RoundUpTo(meta_end, 64 << 10); 263 if (meta_end <= mapped_meta_end) 264 return; 265 if (meta_begin < mapped_meta_end) 266 meta_begin = mapped_meta_end; 267 MmapFixedNoReserve(meta_begin, meta_end - meta_begin, "meta shadow"); 268 mapped_meta_end = meta_end; 269 } 270 VPrintf(2, "mapped meta shadow for (%p-%p) at (%p-%p)\n", 271 addr, addr+size, meta_begin, meta_end); 272} 273 274void MapThreadTrace(uptr addr, uptr size, const char *name) { 275 DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size); 276 CHECK_GE(addr, TraceMemBeg()); 277 CHECK_LE(addr + size, TraceMemEnd()); 278 CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment 279 uptr addr1 = (uptr)MmapFixedNoReserve(addr, size, name); 280 if (addr1 != addr) { 281 Printf("FATAL: ThreadSanitizer can not mmap thread trace (%p/%p->%p)\n", 282 addr, size, addr1); 283 Die(); 284 } 285} 286 287static void CheckShadowMapping() { 288 uptr beg, end; 289 for (int i = 0; GetUserRegion(i, &beg, &end); i++) { 290 VPrintf(3, "checking shadow region %p-%p\n", beg, end); 291 for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) { 292 for (int x = -1; x <= 1; x++) { 293 const uptr p = p0 + x; 294 if (p < beg || p >= end) 295 continue; 296 const uptr s = MemToShadow(p); 297 const uptr m = (uptr)MemToMeta(p); 298 VPrintf(3, " checking pointer %p: shadow=%p meta=%p\n", p, s, m); 299 CHECK(IsAppMem(p)); 300 CHECK(IsShadowMem(s)); 301 CHECK_EQ(p & ~(kShadowCell - 1), ShadowToMem(s)); 302 CHECK(IsMetaMem(m)); 303 } 304 } 305 } 306} 307 308void Initialize(ThreadState *thr) { 309 // Thread safe because done before all threads exist. 310 static bool is_initialized = false; 311 if (is_initialized) 312 return; 313 is_initialized = true; 314 // We are not ready to handle interceptors yet. 315 ScopedIgnoreInterceptors ignore; 316 SanitizerToolName = "ThreadSanitizer"; 317 // Install tool-specific callbacks in sanitizer_common. 318 SetCheckFailedCallback(TsanCheckFailed); 319 320 ctx = new(ctx_placeholder) Context; 321 const char *options = GetEnv(kTsanOptionsEnv); 322 CacheBinaryName(); 323 InitializeFlags(&ctx->flags, options); 324 InitializePlatformEarly(); 325#ifndef SANITIZER_GO 326 // Re-exec ourselves if we need to set additional env or command line args. 327 MaybeReexec(); 328 329 InitializeAllocator(); 330 ReplaceSystemMalloc(); 331#endif 332 InitializeInterceptors(); 333 CheckShadowMapping(); 334 InitializePlatform(); 335 InitializeMutex(); 336 InitializeDynamicAnnotations(); 337#ifndef SANITIZER_GO 338 InitializeShadowMemory(); 339#endif 340 // Setup correct file descriptor for error reports. 341 __sanitizer_set_report_path(common_flags()->log_path); 342 InitializeSuppressions(); 343#ifndef SANITIZER_GO 344 InitializeLibIgnore(); 345 Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer); 346 // On MIPS, TSan initialization is run before 347 // __pthread_initialize_minimal_internal() is finished, so we can not spawn 348 // new threads. 349#ifndef __mips__ 350 StartBackgroundThread(); 351 SetSandboxingCallback(StopBackgroundThread); 352#endif 353#endif 354 if (common_flags()->detect_deadlocks) 355 ctx->dd = DDetector::Create(flags()); 356 357 VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n", 358 (int)internal_getpid()); 359 360 // Initialize thread 0. 361 int tid = ThreadCreate(thr, 0, 0, true); 362 CHECK_EQ(tid, 0); 363 ThreadStart(thr, tid, internal_getpid()); 364#if TSAN_CONTAINS_UBSAN 365 __ubsan::InitAsPlugin(); 366#endif 367 ctx->initialized = true; 368 369 if (flags()->stop_on_start) { 370 Printf("ThreadSanitizer is suspended at startup (pid %d)." 371 " Call __tsan_resume().\n", 372 (int)internal_getpid()); 373 while (__tsan_resumed == 0) {} 374 } 375 376 OnInitialize(); 377} 378 379int Finalize(ThreadState *thr) { 380 bool failed = false; 381 382 if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) 383 SleepForMillis(flags()->atexit_sleep_ms); 384 385 // Wait for pending reports. 386 ctx->report_mtx.Lock(); 387 CommonSanitizerReportMutex.Lock(); 388 CommonSanitizerReportMutex.Unlock(); 389 ctx->report_mtx.Unlock(); 390 391#ifndef SANITIZER_GO 392 if (Verbosity()) AllocatorPrintStats(); 393#endif 394 395 ThreadFinalize(thr); 396 397 if (ctx->nreported) { 398 failed = true; 399#ifndef SANITIZER_GO 400 Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported); 401#else 402 Printf("Found %d data race(s)\n", ctx->nreported); 403#endif 404 } 405 406 if (ctx->nmissed_expected) { 407 failed = true; 408 Printf("ThreadSanitizer: missed %d expected races\n", 409 ctx->nmissed_expected); 410 } 411 412 if (common_flags()->print_suppressions) 413 PrintMatchedSuppressions(); 414#ifndef SANITIZER_GO 415 if (flags()->print_benign) 416 PrintMatchedBenignRaces(); 417#endif 418 419 failed = OnFinalize(failed); 420 421#if TSAN_COLLECT_STATS 422 StatAggregate(ctx->stat, thr->stat); 423 StatOutput(ctx->stat); 424#endif 425 426 return failed ? common_flags()->exitcode : 0; 427} 428 429#ifndef SANITIZER_GO 430void ForkBefore(ThreadState *thr, uptr pc) { 431 ctx->thread_registry->Lock(); 432 ctx->report_mtx.Lock(); 433} 434 435void ForkParentAfter(ThreadState *thr, uptr pc) { 436 ctx->report_mtx.Unlock(); 437 ctx->thread_registry->Unlock(); 438} 439 440void ForkChildAfter(ThreadState *thr, uptr pc) { 441 ctx->report_mtx.Unlock(); 442 ctx->thread_registry->Unlock(); 443 444 uptr nthread = 0; 445 ctx->thread_registry->GetNumberOfThreads(0, 0, &nthread /* alive threads */); 446 VPrintf(1, "ThreadSanitizer: forked new process with pid %d," 447 " parent had %d threads\n", (int)internal_getpid(), (int)nthread); 448 if (nthread == 1) { 449 StartBackgroundThread(); 450 } else { 451 // We've just forked a multi-threaded process. We cannot reasonably function 452 // after that (some mutexes may be locked before fork). So just enable 453 // ignores for everything in the hope that we will exec soon. 454 ctx->after_multithreaded_fork = true; 455 thr->ignore_interceptors++; 456 ThreadIgnoreBegin(thr, pc); 457 ThreadIgnoreSyncBegin(thr, pc); 458 } 459} 460#endif 461 462#ifdef SANITIZER_GO 463NOINLINE 464void GrowShadowStack(ThreadState *thr) { 465 const int sz = thr->shadow_stack_end - thr->shadow_stack; 466 const int newsz = 2 * sz; 467 uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack, 468 newsz * sizeof(uptr)); 469 internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr)); 470 internal_free(thr->shadow_stack); 471 thr->shadow_stack = newstack; 472 thr->shadow_stack_pos = newstack + sz; 473 thr->shadow_stack_end = newstack + newsz; 474} 475#endif 476 477u32 CurrentStackId(ThreadState *thr, uptr pc) { 478 if (!thr->is_inited) // May happen during bootstrap. 479 return 0; 480 if (pc != 0) { 481#ifndef SANITIZER_GO 482 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 483#else 484 if (thr->shadow_stack_pos == thr->shadow_stack_end) 485 GrowShadowStack(thr); 486#endif 487 thr->shadow_stack_pos[0] = pc; 488 thr->shadow_stack_pos++; 489 } 490 u32 id = StackDepotPut( 491 StackTrace(thr->shadow_stack, thr->shadow_stack_pos - thr->shadow_stack)); 492 if (pc != 0) 493 thr->shadow_stack_pos--; 494 return id; 495} 496 497void TraceSwitch(ThreadState *thr) { 498 thr->nomalloc++; 499 Trace *thr_trace = ThreadTrace(thr->tid); 500 Lock l(&thr_trace->mtx); 501 unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); 502 TraceHeader *hdr = &thr_trace->headers[trace]; 503 hdr->epoch0 = thr->fast_state.epoch(); 504 ObtainCurrentStack(thr, 0, &hdr->stack0); 505 hdr->mset0 = thr->mset; 506 thr->nomalloc--; 507} 508 509Trace *ThreadTrace(int tid) { 510 return (Trace*)GetThreadTraceHeader(tid); 511} 512 513uptr TraceTopPC(ThreadState *thr) { 514 Event *events = (Event*)GetThreadTrace(thr->tid); 515 uptr pc = events[thr->fast_state.GetTracePos()]; 516 return pc; 517} 518 519uptr TraceSize() { 520 return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); 521} 522 523uptr TraceParts() { 524 return TraceSize() / kTracePartSize; 525} 526 527#ifndef SANITIZER_GO 528extern "C" void __tsan_trace_switch() { 529 TraceSwitch(cur_thread()); 530} 531 532extern "C" void __tsan_report_race() { 533 ReportRace(cur_thread()); 534} 535#endif 536 537ALWAYS_INLINE 538Shadow LoadShadow(u64 *p) { 539 u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed); 540 return Shadow(raw); 541} 542 543ALWAYS_INLINE 544void StoreShadow(u64 *sp, u64 s) { 545 atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed); 546} 547 548ALWAYS_INLINE 549void StoreIfNotYetStored(u64 *sp, u64 *s) { 550 StoreShadow(sp, *s); 551 *s = 0; 552} 553 554ALWAYS_INLINE 555void HandleRace(ThreadState *thr, u64 *shadow_mem, 556 Shadow cur, Shadow old) { 557 thr->racy_state[0] = cur.raw(); 558 thr->racy_state[1] = old.raw(); 559 thr->racy_shadow_addr = shadow_mem; 560#ifndef SANITIZER_GO 561 HACKY_CALL(__tsan_report_race); 562#else 563 ReportRace(thr); 564#endif 565} 566 567static inline bool HappensBefore(Shadow old, ThreadState *thr) { 568 return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); 569} 570 571ALWAYS_INLINE 572void MemoryAccessImpl1(ThreadState *thr, uptr addr, 573 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, 574 u64 *shadow_mem, Shadow cur) { 575 StatInc(thr, StatMop); 576 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 577 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 578 579 // This potentially can live in an MMX/SSE scratch register. 580 // The required intrinsics are: 581 // __m128i _mm_move_epi64(__m128i*); 582 // _mm_storel_epi64(u64*, __m128i); 583 u64 store_word = cur.raw(); 584 585 // scan all the shadow values and dispatch to 4 categories: 586 // same, replace, candidate and race (see comments below). 587 // we consider only 3 cases regarding access sizes: 588 // equal, intersect and not intersect. initially I considered 589 // larger and smaller as well, it allowed to replace some 590 // 'candidates' with 'same' or 'replace', but I think 591 // it's just not worth it (performance- and complexity-wise). 592 593 Shadow old(0); 594 595 // It release mode we manually unroll the loop, 596 // because empirically gcc generates better code this way. 597 // However, we can't afford unrolling in debug mode, because the function 598 // consumes almost 4K of stack. Gtest gives only 4K of stack to death test 599 // threads, which is not enough for the unrolled loop. 600#if SANITIZER_DEBUG 601 for (int idx = 0; idx < 4; idx++) { 602#include "tsan_update_shadow_word_inl.h" 603 } 604#else 605 int idx = 0; 606#include "tsan_update_shadow_word_inl.h" 607 idx = 1; 608#include "tsan_update_shadow_word_inl.h" 609 idx = 2; 610#include "tsan_update_shadow_word_inl.h" 611 idx = 3; 612#include "tsan_update_shadow_word_inl.h" 613#endif 614 615 // we did not find any races and had already stored 616 // the current access info, so we are done 617 if (LIKELY(store_word == 0)) 618 return; 619 // choose a random candidate slot and replace it 620 StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); 621 StatInc(thr, StatShadowReplace); 622 return; 623 RACE: 624 HandleRace(thr, shadow_mem, cur, old); 625 return; 626} 627 628void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, 629 int size, bool kAccessIsWrite, bool kIsAtomic) { 630 while (size) { 631 int size1 = 1; 632 int kAccessSizeLog = kSizeLog1; 633 if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) { 634 size1 = 8; 635 kAccessSizeLog = kSizeLog8; 636 } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) { 637 size1 = 4; 638 kAccessSizeLog = kSizeLog4; 639 } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) { 640 size1 = 2; 641 kAccessSizeLog = kSizeLog2; 642 } 643 MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); 644 addr += size1; 645 size -= size1; 646 } 647} 648 649ALWAYS_INLINE 650bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 651 Shadow cur(a); 652 for (uptr i = 0; i < kShadowCnt; i++) { 653 Shadow old(LoadShadow(&s[i])); 654 if (Shadow::Addr0AndSizeAreEqual(cur, old) && 655 old.TidWithIgnore() == cur.TidWithIgnore() && 656 old.epoch() > sync_epoch && 657 old.IsAtomic() == cur.IsAtomic() && 658 old.IsRead() <= cur.IsRead()) 659 return true; 660 } 661 return false; 662} 663 664#if defined(__SSE3__) 665#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \ 666 _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \ 667 (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) 668ALWAYS_INLINE 669bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 670 // This is an optimized version of ContainsSameAccessSlow. 671 // load current access into access[0:63] 672 const m128 access = _mm_cvtsi64_si128(a); 673 // duplicate high part of access in addr0: 674 // addr0[0:31] = access[32:63] 675 // addr0[32:63] = access[32:63] 676 // addr0[64:95] = access[32:63] 677 // addr0[96:127] = access[32:63] 678 const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); 679 // load 4 shadow slots 680 const m128 shadow0 = _mm_load_si128((__m128i*)s); 681 const m128 shadow1 = _mm_load_si128((__m128i*)s + 1); 682 // load high parts of 4 shadow slots into addr_vect: 683 // addr_vect[0:31] = shadow0[32:63] 684 // addr_vect[32:63] = shadow0[96:127] 685 // addr_vect[64:95] = shadow1[32:63] 686 // addr_vect[96:127] = shadow1[96:127] 687 m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); 688 if (!is_write) { 689 // set IsRead bit in addr_vect 690 const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15); 691 const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); 692 addr_vect = _mm_or_si128(addr_vect, rw_mask); 693 } 694 // addr0 == addr_vect? 695 const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); 696 // epoch1[0:63] = sync_epoch 697 const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); 698 // epoch[0:31] = sync_epoch[0:31] 699 // epoch[32:63] = sync_epoch[0:31] 700 // epoch[64:95] = sync_epoch[0:31] 701 // epoch[96:127] = sync_epoch[0:31] 702 const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); 703 // load low parts of shadow cell epochs into epoch_vect: 704 // epoch_vect[0:31] = shadow0[0:31] 705 // epoch_vect[32:63] = shadow0[64:95] 706 // epoch_vect[64:95] = shadow1[0:31] 707 // epoch_vect[96:127] = shadow1[64:95] 708 const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); 709 // epoch_vect >= sync_epoch? 710 const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); 711 // addr_res & epoch_res 712 const m128 res = _mm_and_si128(addr_res, epoch_res); 713 // mask[0] = res[7] 714 // mask[1] = res[15] 715 // ... 716 // mask[15] = res[127] 717 const int mask = _mm_movemask_epi8(res); 718 return mask != 0; 719} 720#endif 721 722ALWAYS_INLINE 723bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 724#if defined(__SSE3__) 725 bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); 726 // NOTE: this check can fail if the shadow is concurrently mutated 727 // by other threads. But it still can be useful if you modify 728 // ContainsSameAccessFast and want to ensure that it's not completely broken. 729 // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); 730 return res; 731#else 732 return ContainsSameAccessSlow(s, a, sync_epoch, is_write); 733#endif 734} 735 736ALWAYS_INLINE USED 737void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, 738 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) { 739 u64 *shadow_mem = (u64*)MemToShadow(addr); 740 DPrintf2("#%d: MemoryAccess: @%p %p size=%d" 741 " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", 742 (int)thr->fast_state.tid(), (void*)pc, (void*)addr, 743 (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, 744 (uptr)shadow_mem[0], (uptr)shadow_mem[1], 745 (uptr)shadow_mem[2], (uptr)shadow_mem[3]); 746#if SANITIZER_DEBUG 747 if (!IsAppMem(addr)) { 748 Printf("Access to non app mem %zx\n", addr); 749 DCHECK(IsAppMem(addr)); 750 } 751 if (!IsShadowMem((uptr)shadow_mem)) { 752 Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); 753 DCHECK(IsShadowMem((uptr)shadow_mem)); 754 } 755#endif 756 757 if (kCppMode && *shadow_mem == kShadowRodata) { 758 // Access to .rodata section, no races here. 759 // Measurements show that it can be 10-20% of all memory accesses. 760 StatInc(thr, StatMop); 761 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 762 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 763 StatInc(thr, StatMopRodata); 764 return; 765 } 766 767 FastState fast_state = thr->fast_state; 768 if (fast_state.GetIgnoreBit()) { 769 StatInc(thr, StatMop); 770 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 771 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 772 StatInc(thr, StatMopIgnored); 773 return; 774 } 775 776 Shadow cur(fast_state); 777 cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); 778 cur.SetWrite(kAccessIsWrite); 779 cur.SetAtomic(kIsAtomic); 780 781 if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), 782 thr->fast_synch_epoch, kAccessIsWrite))) { 783 StatInc(thr, StatMop); 784 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 785 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 786 StatInc(thr, StatMopSame); 787 return; 788 } 789 790 if (kCollectHistory) { 791 fast_state.IncrementEpoch(); 792 thr->fast_state = fast_state; 793 TraceAddEvent(thr, fast_state, EventTypeMop, pc); 794 cur.IncrementEpoch(); 795 } 796 797 MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, 798 shadow_mem, cur); 799} 800 801// Called by MemoryAccessRange in tsan_rtl_thread.cc 802ALWAYS_INLINE USED 803void MemoryAccessImpl(ThreadState *thr, uptr addr, 804 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, 805 u64 *shadow_mem, Shadow cur) { 806 if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), 807 thr->fast_synch_epoch, kAccessIsWrite))) { 808 StatInc(thr, StatMop); 809 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 810 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 811 StatInc(thr, StatMopSame); 812 return; 813 } 814 815 MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, 816 shadow_mem, cur); 817} 818 819static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, 820 u64 val) { 821 (void)thr; 822 (void)pc; 823 if (size == 0) 824 return; 825 // FIXME: fix me. 826 uptr offset = addr % kShadowCell; 827 if (offset) { 828 offset = kShadowCell - offset; 829 if (size <= offset) 830 return; 831 addr += offset; 832 size -= offset; 833 } 834 DCHECK_EQ(addr % 8, 0); 835 // If a user passes some insane arguments (memset(0)), 836 // let it just crash as usual. 837 if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) 838 return; 839 // Don't want to touch lots of shadow memory. 840 // If a program maps 10MB stack, there is no need reset the whole range. 841 size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); 842 // UnmapOrDie/MmapFixedNoReserve does not work on Windows, 843 // so we do it only for C/C++. 844 if (kGoMode || size < common_flags()->clear_shadow_mmap_threshold) { 845 u64 *p = (u64*)MemToShadow(addr); 846 CHECK(IsShadowMem((uptr)p)); 847 CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1))); 848 // FIXME: may overwrite a part outside the region 849 for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { 850 p[i++] = val; 851 for (uptr j = 1; j < kShadowCnt; j++) 852 p[i++] = 0; 853 } 854 } else { 855 // The region is big, reset only beginning and end. 856 const uptr kPageSize = GetPageSizeCached(); 857 u64 *begin = (u64*)MemToShadow(addr); 858 u64 *end = begin + size / kShadowCell * kShadowCnt; 859 u64 *p = begin; 860 // Set at least first kPageSize/2 to page boundary. 861 while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { 862 *p++ = val; 863 for (uptr j = 1; j < kShadowCnt; j++) 864 *p++ = 0; 865 } 866 // Reset middle part. 867 u64 *p1 = p; 868 p = RoundDown(end, kPageSize); 869 UnmapOrDie((void*)p1, (uptr)p - (uptr)p1); 870 MmapFixedNoReserve((uptr)p1, (uptr)p - (uptr)p1); 871 // Set the ending. 872 while (p < end) { 873 *p++ = val; 874 for (uptr j = 1; j < kShadowCnt; j++) 875 *p++ = 0; 876 } 877 } 878} 879 880void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { 881 MemoryRangeSet(thr, pc, addr, size, 0); 882} 883 884void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { 885 // Processing more than 1k (4k of shadow) is expensive, 886 // can cause excessive memory consumption (user does not necessary touch 887 // the whole range) and most likely unnecessary. 888 if (size > 1024) 889 size = 1024; 890 CHECK_EQ(thr->is_freeing, false); 891 thr->is_freeing = true; 892 MemoryAccessRange(thr, pc, addr, size, true); 893 thr->is_freeing = false; 894 if (kCollectHistory) { 895 thr->fast_state.IncrementEpoch(); 896 TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); 897 } 898 Shadow s(thr->fast_state); 899 s.ClearIgnoreBit(); 900 s.MarkAsFreed(); 901 s.SetWrite(true); 902 s.SetAddr0AndSizeLog(0, 3); 903 MemoryRangeSet(thr, pc, addr, size, s.raw()); 904} 905 906void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { 907 if (kCollectHistory) { 908 thr->fast_state.IncrementEpoch(); 909 TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); 910 } 911 Shadow s(thr->fast_state); 912 s.ClearIgnoreBit(); 913 s.SetWrite(true); 914 s.SetAddr0AndSizeLog(0, 3); 915 MemoryRangeSet(thr, pc, addr, size, s.raw()); 916} 917 918ALWAYS_INLINE USED 919void FuncEntry(ThreadState *thr, uptr pc) { 920 StatInc(thr, StatFuncEnter); 921 DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc); 922 if (kCollectHistory) { 923 thr->fast_state.IncrementEpoch(); 924 TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); 925 } 926 927 // Shadow stack maintenance can be replaced with 928 // stack unwinding during trace switch (which presumably must be faster). 929 DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); 930#ifndef SANITIZER_GO 931 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 932#else 933 if (thr->shadow_stack_pos == thr->shadow_stack_end) 934 GrowShadowStack(thr); 935#endif 936 thr->shadow_stack_pos[0] = pc; 937 thr->shadow_stack_pos++; 938} 939 940ALWAYS_INLINE USED 941void FuncExit(ThreadState *thr) { 942 StatInc(thr, StatFuncExit); 943 DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); 944 if (kCollectHistory) { 945 thr->fast_state.IncrementEpoch(); 946 TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); 947 } 948 949 DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); 950#ifndef SANITIZER_GO 951 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 952#endif 953 thr->shadow_stack_pos--; 954} 955 956void ThreadIgnoreBegin(ThreadState *thr, uptr pc) { 957 DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); 958 thr->ignore_reads_and_writes++; 959 CHECK_GT(thr->ignore_reads_and_writes, 0); 960 thr->fast_state.SetIgnoreBit(); 961#ifndef SANITIZER_GO 962 if (!ctx->after_multithreaded_fork) 963 thr->mop_ignore_set.Add(CurrentStackId(thr, pc)); 964#endif 965} 966 967void ThreadIgnoreEnd(ThreadState *thr, uptr pc) { 968 DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid); 969 thr->ignore_reads_and_writes--; 970 CHECK_GE(thr->ignore_reads_and_writes, 0); 971 if (thr->ignore_reads_and_writes == 0) { 972 thr->fast_state.ClearIgnoreBit(); 973#ifndef SANITIZER_GO 974 thr->mop_ignore_set.Reset(); 975#endif 976 } 977} 978 979void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) { 980 DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid); 981 thr->ignore_sync++; 982 CHECK_GT(thr->ignore_sync, 0); 983#ifndef SANITIZER_GO 984 if (!ctx->after_multithreaded_fork) 985 thr->sync_ignore_set.Add(CurrentStackId(thr, pc)); 986#endif 987} 988 989void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc) { 990 DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid); 991 thr->ignore_sync--; 992 CHECK_GE(thr->ignore_sync, 0); 993#ifndef SANITIZER_GO 994 if (thr->ignore_sync == 0) 995 thr->sync_ignore_set.Reset(); 996#endif 997} 998 999bool MD5Hash::operator==(const MD5Hash &other) const { 1000 return hash[0] == other.hash[0] && hash[1] == other.hash[1]; 1001} 1002 1003#if SANITIZER_DEBUG 1004void build_consistency_debug() {} 1005#else 1006void build_consistency_release() {} 1007#endif 1008 1009#if TSAN_COLLECT_STATS 1010void build_consistency_stats() {} 1011#else 1012void build_consistency_nostats() {} 1013#endif 1014 1015} // namespace __tsan 1016 1017#ifndef SANITIZER_GO 1018// Must be included in this file to make sure everything is inlined. 1019#include "tsan_interface_inl.h" 1020#endif 1021