1//===-- ThreadSanitizer.cpp - race detector -------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is a part of ThreadSanitizer, a race detector. 11// 12// The tool is under development, for the details about previous versions see 13// http://code.google.com/p/data-race-test 14// 15// The instrumentation phase is quite simple: 16// - Insert calls to run-time library before every memory access. 17// - Optimizations may apply to avoid instrumenting some of the accesses. 18// - Insert calls at function entry/exit. 19// The rest is handled by the run-time library. 20//===----------------------------------------------------------------------===// 21 22#define DEBUG_TYPE "tsan" 23 24#include "BlackList.h" 25#include "llvm/Function.h" 26#include "llvm/IRBuilder.h" 27#include "llvm/Intrinsics.h" 28#include "llvm/LLVMContext.h" 29#include "llvm/Metadata.h" 30#include "llvm/Module.h" 31#include "llvm/Type.h" 32#include "llvm/ADT/SmallSet.h" 33#include "llvm/ADT/SmallString.h" 34#include "llvm/ADT/SmallVector.h" 35#include "llvm/ADT/Statistic.h" 36#include "llvm/ADT/StringExtras.h" 37#include "llvm/Support/CommandLine.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/MathExtras.h" 40#include "llvm/Support/raw_ostream.h" 41#include "llvm/Target/TargetData.h" 42#include "llvm/Transforms/Instrumentation.h" 43#include "llvm/Transforms/Utils/BasicBlockUtils.h" 44#include "llvm/Transforms/Utils/ModuleUtils.h" 45 46using namespace llvm; 47 48static cl::opt<std::string> ClBlackListFile("tsan-blacklist", 49 cl::desc("Blacklist file"), cl::Hidden); 50 51STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 52STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 53STATISTIC(NumOmittedReadsBeforeWrite, 54 "Number of reads ignored due to following writes"); 55STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); 56STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); 57STATISTIC(NumOmittedReadsFromConstantGlobals, 58 "Number of reads from constant globals"); 59STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); 60 61namespace { 62 63/// ThreadSanitizer: instrument the code in module to find races. 64struct ThreadSanitizer : public FunctionPass { 65 ThreadSanitizer(); 66 const char *getPassName() const; 67 bool runOnFunction(Function &F); 68 bool doInitialization(Module &M); 69 static char ID; // Pass identification, replacement for typeid. 70 71 private: 72 bool instrumentLoadOrStore(Instruction *I); 73 bool instrumentAtomic(Instruction *I); 74 void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, 75 SmallVectorImpl<Instruction*> &All); 76 bool addrPointsToConstantData(Value *Addr); 77 int getMemoryAccessFuncIndex(Value *Addr); 78 79 TargetData *TD; 80 OwningPtr<BlackList> BL; 81 IntegerType *OrdTy; 82 // Callbacks to run-time library are computed in doInitialization. 83 Function *TsanFuncEntry; 84 Function *TsanFuncExit; 85 // Accesses sizes are powers of two: 1, 2, 4, 8, 16. 86 static const size_t kNumberOfAccessSizes = 5; 87 Function *TsanRead[kNumberOfAccessSizes]; 88 Function *TsanWrite[kNumberOfAccessSizes]; 89 Function *TsanAtomicLoad[kNumberOfAccessSizes]; 90 Function *TsanAtomicStore[kNumberOfAccessSizes]; 91 Function *TsanVptrUpdate; 92}; 93} // namespace 94 95char ThreadSanitizer::ID = 0; 96INITIALIZE_PASS(ThreadSanitizer, "tsan", 97 "ThreadSanitizer: detects data races.", 98 false, false) 99 100const char *ThreadSanitizer::getPassName() const { 101 return "ThreadSanitizer"; 102} 103 104ThreadSanitizer::ThreadSanitizer() 105 : FunctionPass(ID), 106 TD(NULL) { 107} 108 109FunctionPass *llvm::createThreadSanitizerPass() { 110 return new ThreadSanitizer(); 111} 112 113static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { 114 if (Function *F = dyn_cast<Function>(FuncOrBitcast)) 115 return F; 116 FuncOrBitcast->dump(); 117 report_fatal_error("ThreadSanitizer interface function redefined"); 118} 119 120bool ThreadSanitizer::doInitialization(Module &M) { 121 TD = getAnalysisIfAvailable<TargetData>(); 122 if (!TD) 123 return false; 124 BL.reset(new BlackList(ClBlackListFile)); 125 126 // Always insert a call to __tsan_init into the module's CTORs. 127 IRBuilder<> IRB(M.getContext()); 128 Value *TsanInit = M.getOrInsertFunction("__tsan_init", 129 IRB.getVoidTy(), NULL); 130 appendToGlobalCtors(M, cast<Function>(TsanInit), 0); 131 132 // Initialize the callbacks. 133 TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( 134 "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); 135 TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction( 136 "__tsan_func_exit", IRB.getVoidTy(), NULL)); 137 OrdTy = IRB.getInt32Ty(); 138 for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { 139 const size_t ByteSize = 1 << i; 140 const size_t BitSize = ByteSize * 8; 141 SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); 142 TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction( 143 ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); 144 145 SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); 146 TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( 147 WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); 148 149 Type *Ty = Type::getIntNTy(M.getContext(), BitSize); 150 Type *PtrTy = Ty->getPointerTo(); 151 SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) + 152 "_load"); 153 TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction( 154 AtomicLoadName, Ty, PtrTy, OrdTy, NULL)); 155 156 SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) + 157 "_store"); 158 TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( 159 AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, 160 NULL)); 161 } 162 TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( 163 "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), 164 IRB.getInt8PtrTy(), NULL)); 165 return true; 166} 167 168static bool isVtableAccess(Instruction *I) { 169 if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { 170 if (Tag->getNumOperands() < 1) return false; 171 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { 172 if (Tag1->getString() == "vtable pointer") return true; 173 } 174 } 175 return false; 176} 177 178bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { 179 // If this is a GEP, just analyze its pointer operand. 180 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) 181 Addr = GEP->getPointerOperand(); 182 183 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 184 if (GV->isConstant()) { 185 // Reads from constant globals can not race with any writes. 186 NumOmittedReadsFromConstantGlobals++; 187 return true; 188 } 189 } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) { 190 if (isVtableAccess(L)) { 191 // Reads from a vtable pointer can not race with any writes. 192 NumOmittedReadsFromVtable++; 193 return true; 194 } 195 } 196 return false; 197} 198 199// Instrumenting some of the accesses may be proven redundant. 200// Currently handled: 201// - read-before-write (within same BB, no calls between) 202// 203// We do not handle some of the patterns that should not survive 204// after the classic compiler optimizations. 205// E.g. two reads from the same temp should be eliminated by CSE, 206// two writes should be eliminated by DSE, etc. 207// 208// 'Local' is a vector of insns within the same BB (no calls between). 209// 'All' is a vector of insns that will be instrumented. 210void ThreadSanitizer::chooseInstructionsToInstrument( 211 SmallVectorImpl<Instruction*> &Local, 212 SmallVectorImpl<Instruction*> &All) { 213 SmallSet<Value*, 8> WriteTargets; 214 // Iterate from the end. 215 for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), 216 E = Local.rend(); It != E; ++It) { 217 Instruction *I = *It; 218 if (StoreInst *Store = dyn_cast<StoreInst>(I)) { 219 WriteTargets.insert(Store->getPointerOperand()); 220 } else { 221 LoadInst *Load = cast<LoadInst>(I); 222 Value *Addr = Load->getPointerOperand(); 223 if (WriteTargets.count(Addr)) { 224 // We will write to this temp, so no reason to analyze the read. 225 NumOmittedReadsBeforeWrite++; 226 continue; 227 } 228 if (addrPointsToConstantData(Addr)) { 229 // Addr points to some constant data -- it can not race with any writes. 230 continue; 231 } 232 } 233 All.push_back(I); 234 } 235 Local.clear(); 236} 237 238static bool isAtomic(Instruction *I) { 239 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 240 return LI->isAtomic() && LI->getSynchScope() == CrossThread; 241 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 242 return SI->isAtomic() && SI->getSynchScope() == CrossThread; 243 if (isa<AtomicRMWInst>(I)) 244 return true; 245 if (isa<AtomicCmpXchgInst>(I)) 246 return true; 247 if (FenceInst *FI = dyn_cast<FenceInst>(I)) 248 return FI->getSynchScope() == CrossThread; 249 return false; 250} 251 252bool ThreadSanitizer::runOnFunction(Function &F) { 253 if (!TD) return false; 254 if (BL->isIn(F)) return false; 255 SmallVector<Instruction*, 8> RetVec; 256 SmallVector<Instruction*, 8> AllLoadsAndStores; 257 SmallVector<Instruction*, 8> LocalLoadsAndStores; 258 SmallVector<Instruction*, 8> AtomicAccesses; 259 bool Res = false; 260 bool HasCalls = false; 261 262 // Traverse all instructions, collect loads/stores/returns, check for calls. 263 for (Function::iterator FI = F.begin(), FE = F.end(); 264 FI != FE; ++FI) { 265 BasicBlock &BB = *FI; 266 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); 267 BI != BE; ++BI) { 268 if (isAtomic(BI)) 269 AtomicAccesses.push_back(BI); 270 else if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) 271 LocalLoadsAndStores.push_back(BI); 272 else if (isa<ReturnInst>(BI)) 273 RetVec.push_back(BI); 274 else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 275 HasCalls = true; 276 chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); 277 } 278 } 279 chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); 280 } 281 282 // We have collected all loads and stores. 283 // FIXME: many of these accesses do not need to be checked for races 284 // (e.g. variables that do not escape, etc). 285 286 // Instrument memory accesses. 287 for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { 288 Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); 289 } 290 291 // Instrument atomic memory accesses. 292 for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) { 293 Res |= instrumentAtomic(AtomicAccesses[i]); 294 } 295 296 // Instrument function entry/exit points if there were instrumented accesses. 297 if (Res || HasCalls) { 298 IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); 299 Value *ReturnAddress = IRB.CreateCall( 300 Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), 301 IRB.getInt32(0)); 302 IRB.CreateCall(TsanFuncEntry, ReturnAddress); 303 for (size_t i = 0, n = RetVec.size(); i < n; ++i) { 304 IRBuilder<> IRBRet(RetVec[i]); 305 IRBRet.CreateCall(TsanFuncExit); 306 } 307 Res = true; 308 } 309 return Res; 310} 311 312bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { 313 IRBuilder<> IRB(I); 314 bool IsWrite = isa<StoreInst>(*I); 315 Value *Addr = IsWrite 316 ? cast<StoreInst>(I)->getPointerOperand() 317 : cast<LoadInst>(I)->getPointerOperand(); 318 int Idx = getMemoryAccessFuncIndex(Addr); 319 if (Idx < 0) 320 return false; 321 if (IsWrite && isVtableAccess(I)) { 322 DEBUG(dbgs() << " VPTR : " << *I << "\n"); 323 Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); 324 // StoredValue does not necessary have a pointer type. 325 if (isa<IntegerType>(StoredValue->getType())) 326 StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy()); 327 // Call TsanVptrUpdate. 328 IRB.CreateCall2(TsanVptrUpdate, 329 IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), 330 IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); 331 NumInstrumentedVtableWrites++; 332 return true; 333 } 334 Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; 335 IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); 336 if (IsWrite) NumInstrumentedWrites++; 337 else NumInstrumentedReads++; 338 return true; 339} 340 341static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { 342 uint32_t v = 0; 343 switch (ord) { 344 case NotAtomic: assert(false); 345 case Unordered: // Fall-through. 346 case Monotonic: v = 1 << 0; break; 347 // case Consume: v = 1 << 1; break; // Not specified yet. 348 case Acquire: v = 1 << 2; break; 349 case Release: v = 1 << 3; break; 350 case AcquireRelease: v = 1 << 4; break; 351 case SequentiallyConsistent: v = 1 << 5; break; 352 } 353 return IRB->getInt32(v); 354} 355 356bool ThreadSanitizer::instrumentAtomic(Instruction *I) { 357 IRBuilder<> IRB(I); 358 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 359 Value *Addr = LI->getPointerOperand(); 360 int Idx = getMemoryAccessFuncIndex(Addr); 361 if (Idx < 0) 362 return false; 363 const size_t ByteSize = 1 << Idx; 364 const size_t BitSize = ByteSize * 8; 365 Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); 366 Type *PtrTy = Ty->getPointerTo(); 367 Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), 368 createOrdering(&IRB, LI->getOrdering())}; 369 CallInst *C = CallInst::Create(TsanAtomicLoad[Idx], 370 ArrayRef<Value*>(Args)); 371 ReplaceInstWithInst(I, C); 372 373 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 374 Value *Addr = SI->getPointerOperand(); 375 int Idx = getMemoryAccessFuncIndex(Addr); 376 if (Idx < 0) 377 return false; 378 const size_t ByteSize = 1 << Idx; 379 const size_t BitSize = ByteSize * 8; 380 Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); 381 Type *PtrTy = Ty->getPointerTo(); 382 Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), 383 IRB.CreateIntCast(SI->getValueOperand(), Ty, false), 384 createOrdering(&IRB, SI->getOrdering())}; 385 CallInst *C = CallInst::Create(TsanAtomicStore[Idx], 386 ArrayRef<Value*>(Args)); 387 ReplaceInstWithInst(I, C); 388 } else if (isa<AtomicRMWInst>(I)) { 389 // FIXME: Not yet supported. 390 } else if (isa<AtomicCmpXchgInst>(I)) { 391 // FIXME: Not yet supported. 392 } else if (isa<FenceInst>(I)) { 393 // FIXME: Not yet supported. 394 } 395 return true; 396} 397 398int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) { 399 Type *OrigPtrTy = Addr->getType(); 400 Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); 401 assert(OrigTy->isSized()); 402 uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); 403 if (TypeSize != 8 && TypeSize != 16 && 404 TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { 405 NumAccessesWithBadSize++; 406 // Ignore all unusual sizes. 407 return -1; 408 } 409 size_t Idx = CountTrailingZeros_32(TypeSize / 8); 410 assert(Idx < kNumberOfAccessSizes); 411 return Idx; 412} 413