AddressSanitizer.cpp revision 18c7f80b3e83ab584bd8572695a3cde8bafd9d3c
1//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of AddressSanitizer, an address sanity checker.
11// Details of the algorithm:
12//  http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "asan"
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/OwningPtr.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/Function.h"
25#include "llvm/IntrinsicInst.h"
26#include "llvm/LLVMContext.h"
27#include "llvm/Module.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/DataTypes.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/IRBuilder.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include "llvm/Support/Regex.h"
34#include "llvm/Support/raw_ostream.h"
35#include "llvm/Support/system_error.h"
36#include "llvm/Target/TargetData.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/Instrumentation.h"
39#include "llvm/Transforms/Utils/BasicBlockUtils.h"
40#include "llvm/Transforms/Utils/ModuleUtils.h"
41#include "llvm/Type.h"
42
43#include <string>
44#include <algorithm>
45
46using namespace llvm;
47
48static const uint64_t kDefaultShadowScale = 3;
49static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
50static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
51
52static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
53static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
54static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
55
56static const char *kAsanModuleCtorName = "asan.module_ctor";
57static const char *kAsanModuleDtorName = "asan.module_dtor";
58static const int   kAsanCtorAndCtorPriority = 1;
59static const char *kAsanReportErrorTemplate = "__asan_report_";
60static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
61static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
62static const char *kAsanInitName = "__asan_init";
63static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
64static const char *kAsanMappingScaleName = "__asan_mapping_scale";
65static const char *kAsanStackMallocName = "__asan_stack_malloc";
66static const char *kAsanStackFreeName = "__asan_stack_free";
67
68static const int kAsanStackLeftRedzoneMagic = 0xf1;
69static const int kAsanStackMidRedzoneMagic = 0xf2;
70static const int kAsanStackRightRedzoneMagic = 0xf3;
71static const int kAsanStackPartialRedzoneMagic = 0xf4;
72
73// Command-line flags.
74
75// This flag may need to be replaced with -f[no-]asan-reads.
76static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
77       cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
78static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
79       cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
80// This flag may need to be replaced with -f[no]asan-stack.
81static cl::opt<bool> ClStack("asan-stack",
82       cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
83// This flag may need to be replaced with -f[no]asan-use-after-return.
84static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
85       cl::desc("Check return-after-free"), cl::Hidden, cl::init(false));
86// This flag may need to be replaced with -f[no]asan-globals.
87static cl::opt<bool> ClGlobals("asan-globals",
88       cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
89static cl::opt<bool> ClMemIntrin("asan-memintrin",
90       cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
91// This flag may need to be replaced with -fasan-blacklist.
92static cl::opt<std::string>  ClBlackListFile("asan-blacklist",
93       cl::desc("File containing the list of functions to ignore "
94                "during instrumentation"), cl::Hidden);
95
96// These flags allow to change the shadow mapping.
97// The shadow mapping looks like
98//    Shadow = (Mem >> scale) + (1 << offset_log)
99static cl::opt<int> ClMappingScale("asan-mapping-scale",
100       cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
101static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
102       cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
103
104// Optimization flags. Not user visible, used mostly for testing
105// and benchmarking the tool.
106static cl::opt<bool> ClOpt("asan-opt",
107       cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
108static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
109       cl::desc("Instrument the same temp just once"), cl::Hidden,
110       cl::init(true));
111static cl::opt<bool> ClOptGlobals("asan-opt-globals",
112       cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
113
114// Debug flags.
115static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
116                            cl::init(0));
117static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
118                                 cl::Hidden, cl::init(0));
119static cl::opt<std::string> ClDebugFunc("asan-debug-func",
120                                        cl::Hidden, cl::desc("Debug func"));
121static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
122                               cl::Hidden, cl::init(-1));
123static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
124                               cl::Hidden, cl::init(-1));
125
126namespace {
127
128// Blacklisted functions are not instrumented.
129// The blacklist file contains one or more lines like this:
130// ---
131// fun:FunctionWildCard
132// ---
133// This is similar to the "ignore" feature of ThreadSanitizer.
134// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
135class BlackList {
136 public:
137  BlackList(const std::string &Path);
138  bool isIn(const Function &F);
139 private:
140  Regex *Functions;
141};
142
143/// AddressSanitizer: instrument the code in module to find memory bugs.
144struct AddressSanitizer : public ModulePass {
145  AddressSanitizer();
146  virtual const char *getPassName() const;
147  void instrumentMop(Instruction *I);
148  void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
149                         Value *Addr, uint32_t TypeSize, bool IsWrite);
150  Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr,
151                                 bool IsWrite, uint32_t TypeSize);
152  bool instrumentMemIntrinsic(MemIntrinsic *MI);
153  void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
154                                  Value *Size,
155                                   Instruction *InsertBefore, bool IsWrite);
156  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
157  bool handleFunction(Module &M, Function &F);
158  bool maybeInsertAsanInitAtFunctionEntry(Function &F);
159  bool poisonStackInFunction(Module &M, Function &F);
160  virtual bool runOnModule(Module &M);
161  bool insertGlobalRedzones(Module &M);
162  BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp);
163  static char ID;  // Pass identification, replacement for typeid
164
165 private:
166
167  uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
168    Type *Ty = AI->getAllocatedType();
169    uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8;
170    return SizeInBytes;
171  }
172  uint64_t getAlignedSize(uint64_t SizeInBytes) {
173    return ((SizeInBytes + RedzoneSize - 1)
174            / RedzoneSize) * RedzoneSize;
175  }
176  uint64_t getAlignedAllocaSize(AllocaInst *AI) {
177    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
178    return getAlignedSize(SizeInBytes);
179  }
180
181  void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
182                   Value *ShadowBase, bool DoPoison);
183  bool LooksLikeCodeInBug11395(Instruction *I);
184
185  Module      *CurrentModule;
186  LLVMContext *C;
187  TargetData *TD;
188  uint64_t MappingOffset;
189  int MappingScale;
190  size_t RedzoneSize;
191  int LongSize;
192  Type *IntptrTy;
193  Type *IntptrPtrTy;
194  Function *AsanCtorFunction;
195  Function *AsanInitFunction;
196  Instruction *CtorInsertBefore;
197  OwningPtr<BlackList> BL;
198};
199}  // namespace
200
201char AddressSanitizer::ID = 0;
202INITIALIZE_PASS(AddressSanitizer, "asan",
203    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
204    false, false)
205AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
206ModulePass *llvm::createAddressSanitizerPass() {
207  return new AddressSanitizer();
208}
209
210const char *AddressSanitizer::getPassName() const {
211  return "AddressSanitizer";
212}
213
214// Create a constant for Str so that we can pass it to the run-time lib.
215static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
216  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
217  return new GlobalVariable(M, StrConst->getType(), true,
218                            GlobalValue::PrivateLinkage, StrConst, "");
219}
220
221// Split the basic block and insert an if-then code.
222// Before:
223//   Head
224//   SplitBefore
225//   Tail
226// After:
227//   Head
228//   if (Cmp)
229//     NewBasicBlock
230//   SplitBefore
231//   Tail
232//
233// Returns the NewBasicBlock's terminator.
234BranchInst *AddressSanitizer::splitBlockAndInsertIfThen(
235    Instruction *SplitBefore, Value *Cmp) {
236  BasicBlock *Head = SplitBefore->getParent();
237  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
238  TerminatorInst *HeadOldTerm = Head->getTerminator();
239  BasicBlock *NewBasicBlock =
240      BasicBlock::Create(*C, "", Head->getParent());
241  BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock,
242                                               /*ifFalse*/Tail,
243                                               Cmp);
244  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
245
246  BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock);
247  return CheckTerm;
248}
249
250Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
251  // Shadow >> scale
252  Shadow = IRB.CreateLShr(Shadow, MappingScale);
253  if (MappingOffset == 0)
254    return Shadow;
255  // (Shadow >> scale) | offset
256  return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy,
257                                               MappingOffset));
258}
259
260void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns,
261    Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
262  // Check the first byte.
263  {
264    IRBuilder<> IRB(InsertBefore);
265    instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
266  }
267  // Check the last byte.
268  {
269    IRBuilder<> IRB(InsertBefore);
270    Value *SizeMinusOne = IRB.CreateSub(
271        Size, ConstantInt::get(Size->getType(), 1));
272    SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
273    Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
274    Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
275    instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
276  }
277}
278
279// Instrument memset/memmove/memcpy
280bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
281  Value *Dst = MI->getDest();
282  MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
283  Value *Src = MemTran ? MemTran->getSource() : NULL;
284  Value *Length = MI->getLength();
285
286  Constant *ConstLength = dyn_cast<Constant>(Length);
287  Instruction *InsertBefore = MI;
288  if (ConstLength) {
289    if (ConstLength->isNullValue()) return false;
290  } else {
291    // The size is not a constant so it could be zero -- check at run-time.
292    IRBuilder<> IRB(InsertBefore);
293
294    Value *Cmp = IRB.CreateICmpNE(Length,
295                                   Constant::getNullValue(Length->getType()));
296    InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp);
297  }
298
299  instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
300  if (Src)
301    instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
302  return true;
303}
304
305static Value *getLDSTOperand(Instruction *I) {
306  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
307    return LI->getPointerOperand();
308  }
309  return cast<StoreInst>(*I).getPointerOperand();
310}
311
312void AddressSanitizer::instrumentMop(Instruction *I) {
313  int IsWrite = isa<StoreInst>(*I);
314  Value *Addr = getLDSTOperand(I);
315  if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
316    // We are accessing a global scalar variable. Nothing to catch here.
317    return;
318  }
319  Type *OrigPtrTy = Addr->getType();
320  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
321
322  assert(OrigTy->isSized());
323  uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
324
325  if (TypeSize != 8  && TypeSize != 16 &&
326      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
327    // Ignore all unusual sizes.
328    return;
329  }
330
331  IRBuilder<> IRB(I);
332  instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
333}
334
335Instruction *AddressSanitizer::generateCrashCode(
336    IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) {
337  // IsWrite and TypeSize are encoded in the function name.
338  std::string FunctionName = std::string(kAsanReportErrorTemplate) +
339      (IsWrite ? "store" : "load") + itostr(TypeSize / 8);
340  Value *ReportWarningFunc = CurrentModule->getOrInsertFunction(
341      FunctionName, IRB.getVoidTy(), IntptrTy, NULL);
342  CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr);
343  Call->setDoesNotReturn();
344  return Call;
345}
346
347void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
348                                         IRBuilder<> &IRB, Value *Addr,
349                                         uint32_t TypeSize, bool IsWrite) {
350  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
351
352  Type *ShadowTy  = IntegerType::get(
353      *C, std::max(8U, TypeSize >> MappingScale));
354  Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
355  Value *ShadowPtr = memToShadow(AddrLong, IRB);
356  Value *CmpVal = Constant::getNullValue(ShadowTy);
357  Value *ShadowValue = IRB.CreateLoad(
358      IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
359
360  Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
361
362  Instruction *CheckTerm = splitBlockAndInsertIfThen(
363      cast<Instruction>(Cmp)->getNextNode(), Cmp);
364  IRBuilder<> IRB2(CheckTerm);
365
366  size_t Granularity = 1 << MappingScale;
367  if (TypeSize < 8 * Granularity) {
368    // Addr & (Granularity - 1)
369    Value *Lower3Bits = IRB2.CreateAnd(
370        AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
371    // (Addr & (Granularity - 1)) + size - 1
372    Value *LastAccessedByte = IRB2.CreateAdd(
373        Lower3Bits, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
374    // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
375    LastAccessedByte = IRB2.CreateIntCast(
376        LastAccessedByte, IRB.getInt8Ty(), false);
377    // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
378    Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue);
379
380    CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2);
381  }
382
383  IRBuilder<> IRB1(CheckTerm);
384  Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
385  Crash->setDebugLoc(OrigIns->getDebugLoc());
386  ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
387}
388
389// This function replaces all global variables with new variables that have
390// trailing redzones. It also creates a function that poisons
391// redzones and inserts this function into llvm.global_ctors.
392bool AddressSanitizer::insertGlobalRedzones(Module &M) {
393  SmallVector<GlobalVariable *, 16> GlobalsToChange;
394
395  for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
396       E = M.getGlobalList().end(); G != E; ++G) {
397    Type *Ty = cast<PointerType>(G->getType())->getElementType();
398    DEBUG(dbgs() << "GLOBAL: " << *G);
399
400    if (!Ty->isSized()) continue;
401    if (!G->hasInitializer()) continue;
402    // Touch only those globals that will not be defined in other modules.
403    // Don't handle ODR type linkages since other modules may be built w/o asan.
404    if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
405        G->getLinkage() != GlobalVariable::PrivateLinkage &&
406        G->getLinkage() != GlobalVariable::InternalLinkage)
407      continue;
408    // Two problems with thread-locals:
409    //   - The address of the main thread's copy can't be computed at link-time.
410    //   - Need to poison all copies, not just the main thread's one.
411    if (G->isThreadLocal())
412      continue;
413    // For now, just ignore this Alloca if the alignment is large.
414    if (G->getAlignment() > RedzoneSize) continue;
415
416    // Ignore all the globals with the names starting with "\01L_OBJC_".
417    // Many of those are put into the .cstring section. The linker compresses
418    // that section by removing the spare \0s after the string terminator, so
419    // our redzones get broken.
420    if ((G->getName().find("\01L_OBJC_") == 0) ||
421        (G->getName().find("\01l_OBJC_") == 0)) {
422      DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
423      continue;
424    }
425
426    if (G->hasSection()) {
427      StringRef Section(G->getSection());
428      // Ignore the globals from the __OBJC section. The ObjC runtime assumes
429      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
430      // them.
431      if ((Section.find("__OBJC,") == 0) ||
432          (Section.find("__DATA, __objc_") == 0)) {
433        DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
434        continue;
435      }
436      // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
437      // Constant CFString instances are compiled in the following way:
438      //  -- the string buffer is emitted into
439      //     __TEXT,__cstring,cstring_literals
440      //  -- the constant NSConstantString structure referencing that buffer
441      //     is placed into __DATA,__cfstring
442      // Therefore there's no point in placing redzones into __DATA,__cfstring.
443      // Moreover, it causes the linker to crash on OS X 10.7
444      if (Section.find("__DATA,__cfstring") == 0) {
445        DEBUG(dbgs() << "Ignoring CFString: " << *G);
446        continue;
447      }
448    }
449
450    GlobalsToChange.push_back(G);
451  }
452
453  size_t n = GlobalsToChange.size();
454  if (n == 0) return false;
455
456  // A global is described by a structure
457  //   size_t beg;
458  //   size_t size;
459  //   size_t size_with_redzone;
460  //   const char *name;
461  // We initialize an array of such structures and pass it to a run-time call.
462  StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
463                                               IntptrTy, IntptrTy, NULL);
464  SmallVector<Constant *, 16> Initializers(n);
465
466  IRBuilder<> IRB(CtorInsertBefore);
467
468  for (size_t i = 0; i < n; i++) {
469    GlobalVariable *G = GlobalsToChange[i];
470    PointerType *PtrTy = cast<PointerType>(G->getType());
471    Type *Ty = PtrTy->getElementType();
472    uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8;
473    uint64_t RightRedzoneSize = RedzoneSize +
474        (RedzoneSize - (SizeInBytes % RedzoneSize));
475    Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
476
477    StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
478    Constant *NewInitializer = ConstantStruct::get(
479        NewTy, G->getInitializer(),
480        Constant::getNullValue(RightRedZoneTy), NULL);
481
482    SmallString<2048> DescriptionOfGlobal = G->getName();
483    DescriptionOfGlobal += " (";
484    DescriptionOfGlobal += M.getModuleIdentifier();
485    DescriptionOfGlobal += ")";
486    GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
487
488    // Create a new global variable with enough space for a redzone.
489    GlobalVariable *NewGlobal = new GlobalVariable(
490        M, NewTy, G->isConstant(), G->getLinkage(),
491        NewInitializer, "", G, G->isThreadLocal());
492    NewGlobal->copyAttributesFrom(G);
493    NewGlobal->setAlignment(RedzoneSize);
494
495    Value *Indices2[2];
496    Indices2[0] = IRB.getInt32(0);
497    Indices2[1] = IRB.getInt32(0);
498
499    G->replaceAllUsesWith(
500        ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
501    NewGlobal->takeName(G);
502    G->eraseFromParent();
503
504    Initializers[i] = ConstantStruct::get(
505        GlobalStructTy,
506        ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
507        ConstantInt::get(IntptrTy, SizeInBytes),
508        ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
509        ConstantExpr::getPointerCast(Name, IntptrTy),
510        NULL);
511    DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
512  }
513
514  ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
515  GlobalVariable *AllGlobals = new GlobalVariable(
516      M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
517      ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
518
519  Function *AsanRegisterGlobals = cast<Function>(M.getOrInsertFunction(
520      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
521  AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
522
523  IRB.CreateCall2(AsanRegisterGlobals,
524                  IRB.CreatePointerCast(AllGlobals, IntptrTy),
525                  ConstantInt::get(IntptrTy, n));
526
527  // We also need to unregister globals at the end, e.g. when a shared library
528  // gets closed.
529  Function *AsanDtorFunction = Function::Create(
530      FunctionType::get(Type::getVoidTy(*C), false),
531      GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
532  BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
533  IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
534  Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction(
535      kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
536  AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
537
538  IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
539                       IRB.CreatePointerCast(AllGlobals, IntptrTy),
540                       ConstantInt::get(IntptrTy, n));
541  appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
542
543  DEBUG(dbgs() << M);
544  return true;
545}
546
547// virtual
548bool AddressSanitizer::runOnModule(Module &M) {
549  // Initialize the private fields. No one has accessed them before.
550  TD = getAnalysisIfAvailable<TargetData>();
551  if (!TD)
552    return false;
553  BL.reset(new BlackList(ClBlackListFile));
554
555  CurrentModule = &M;
556  C = &(M.getContext());
557  LongSize = TD->getPointerSizeInBits();
558  IntptrTy = Type::getIntNTy(*C, LongSize);
559  IntptrPtrTy = PointerType::get(IntptrTy, 0);
560
561  AsanCtorFunction = Function::Create(
562      FunctionType::get(Type::getVoidTy(*C), false),
563      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
564  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
565  CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB);
566
567  // call __asan_init in the module ctor.
568  IRBuilder<> IRB(CtorInsertBefore);
569  AsanInitFunction = cast<Function>(
570      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
571  AsanInitFunction->setLinkage(Function::ExternalLinkage);
572  IRB.CreateCall(AsanInitFunction);
573
574  MappingOffset = LongSize == 32
575      ? kDefaultShadowOffset32 : kDefaultShadowOffset64;
576  if (ClMappingOffsetLog >= 0) {
577    if (ClMappingOffsetLog == 0) {
578      // special case
579      MappingOffset = 0;
580    } else {
581      MappingOffset = 1ULL << ClMappingOffsetLog;
582    }
583  }
584  MappingScale = kDefaultShadowScale;
585  if (ClMappingScale) {
586    MappingScale = ClMappingScale;
587  }
588  // Redzone used for stack and globals is at least 32 bytes.
589  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
590  RedzoneSize = std::max(32, (int)(1 << MappingScale));
591
592  bool Res = false;
593
594  if (ClGlobals)
595    Res |= insertGlobalRedzones(M);
596
597  // Tell the run-time the current values of mapping offset and scale.
598  GlobalValue *asan_mapping_offset =
599      new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
600                     ConstantInt::get(IntptrTy, MappingOffset),
601                     kAsanMappingOffsetName);
602  GlobalValue *asan_mapping_scale =
603      new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
604                         ConstantInt::get(IntptrTy, MappingScale),
605                         kAsanMappingScaleName);
606  // Read these globals, otherwise they may be optimized away.
607  IRB.CreateLoad(asan_mapping_scale, true);
608  IRB.CreateLoad(asan_mapping_offset, true);
609
610
611  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
612    if (F->isDeclaration()) continue;
613    Res |= handleFunction(M, *F);
614  }
615
616  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
617
618  return Res;
619}
620
621bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
622  // For each NSObject descendant having a +load method, this method is invoked
623  // by the ObjC runtime before any of the static constructors is called.
624  // Therefore we need to instrument such methods with a call to __asan_init
625  // at the beginning in order to initialize our runtime before any access to
626  // the shadow memory.
627  // We cannot just ignore these methods, because they may call other
628  // instrumented functions.
629  if (F.getName().find(" load]") != std::string::npos) {
630    IRBuilder<> IRB(F.begin()->begin());
631    IRB.CreateCall(AsanInitFunction);
632    return true;
633  }
634  return false;
635}
636
637bool AddressSanitizer::handleFunction(Module &M, Function &F) {
638  if (BL->isIn(F)) return false;
639  if (&F == AsanCtorFunction) return false;
640
641  // If needed, insert __asan_init before checking for AddressSafety attr.
642  maybeInsertAsanInitAtFunctionEntry(F);
643
644  if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
645
646  if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
647    return false;
648  // We want to instrument every address only once per basic block
649  // (unless there are calls between uses).
650  SmallSet<Value*, 16> TempsToInstrument;
651  SmallVector<Instruction*, 16> ToInstrument;
652
653  // Fill the set of memory operations to instrument.
654  for (Function::iterator FI = F.begin(), FE = F.end();
655       FI != FE; ++FI) {
656    TempsToInstrument.clear();
657    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
658         BI != BE; ++BI) {
659      if (LooksLikeCodeInBug11395(BI)) return false;
660      if ((isa<LoadInst>(BI) && ClInstrumentReads) ||
661          (isa<StoreInst>(BI) && ClInstrumentWrites)) {
662        Value *Addr = getLDSTOperand(BI);
663        if (ClOpt && ClOptSameTemp) {
664          if (!TempsToInstrument.insert(Addr))
665            continue;  // We've seen this temp in the current BB.
666        }
667      } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
668        // ok, take it.
669      } else {
670        if (isa<CallInst>(BI)) {
671          // A call inside BB.
672          TempsToInstrument.clear();
673        }
674        continue;
675      }
676      ToInstrument.push_back(BI);
677    }
678  }
679
680  // Instrument.
681  int NumInstrumented = 0;
682  for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
683    Instruction *Inst = ToInstrument[i];
684    if (ClDebugMin < 0 || ClDebugMax < 0 ||
685        (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
686      if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst))
687        instrumentMop(Inst);
688      else
689        instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
690    }
691    NumInstrumented++;
692  }
693
694  DEBUG(dbgs() << F);
695
696  bool ChangedStack = poisonStackInFunction(M, F);
697  return NumInstrumented > 0 || ChangedStack;
698}
699
700static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
701  if (ShadowRedzoneSize == 1) return PoisonByte;
702  if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte;
703  if (ShadowRedzoneSize == 4)
704    return (PoisonByte << 24) + (PoisonByte << 16) +
705        (PoisonByte << 8) + (PoisonByte);
706  assert(0 && "ShadowRedzoneSize is either 1, 2 or 4");
707  return 0;
708}
709
710static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
711                                            size_t Size,
712                                            size_t RedzoneSize,
713                                            size_t ShadowGranularity,
714                                            uint8_t Magic) {
715  for (size_t i = 0; i < RedzoneSize;
716       i+= ShadowGranularity, Shadow++) {
717    if (i + ShadowGranularity <= Size) {
718      *Shadow = 0;  // fully addressable
719    } else if (i >= Size) {
720      *Shadow = Magic;  // unaddressable
721    } else {
722      *Shadow = Size - i;  // first Size-i bytes are addressable
723    }
724  }
725}
726
727void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
728                                   IRBuilder<> IRB,
729                                   Value *ShadowBase, bool DoPoison) {
730  size_t ShadowRZSize = RedzoneSize >> MappingScale;
731  assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
732  Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
733  Type *RZPtrTy = PointerType::get(RZTy, 0);
734
735  Value *PoisonLeft  = ConstantInt::get(RZTy,
736    ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize));
737  Value *PoisonMid   = ConstantInt::get(RZTy,
738    ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize));
739  Value *PoisonRight = ConstantInt::get(RZTy,
740    ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize));
741
742  // poison the first red zone.
743  IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
744
745  // poison all other red zones.
746  uint64_t Pos = RedzoneSize;
747  for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
748    AllocaInst *AI = AllocaVec[i];
749    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
750    uint64_t AlignedSize = getAlignedAllocaSize(AI);
751    assert(AlignedSize - SizeInBytes < RedzoneSize);
752    Value *Ptr = NULL;
753
754    Pos += AlignedSize;
755
756    assert(ShadowBase->getType() == IntptrTy);
757    if (SizeInBytes < AlignedSize) {
758      // Poison the partial redzone at right
759      Ptr = IRB.CreateAdd(
760          ShadowBase, ConstantInt::get(IntptrTy,
761                                       (Pos >> MappingScale) - ShadowRZSize));
762      size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes);
763      uint32_t Poison = 0;
764      if (DoPoison) {
765        PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
766                                        RedzoneSize,
767                                        1ULL << MappingScale,
768                                        kAsanStackPartialRedzoneMagic);
769      }
770      Value *PartialPoison = ConstantInt::get(RZTy, Poison);
771      IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
772    }
773
774    // Poison the full redzone at right.
775    Ptr = IRB.CreateAdd(ShadowBase,
776                        ConstantInt::get(IntptrTy, Pos >> MappingScale));
777    Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid;
778    IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
779
780    Pos += RedzoneSize;
781  }
782}
783
784// Workaround for bug 11395: we don't want to instrument stack in functions
785// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
786// FIXME: remove once the bug 11395 is fixed.
787bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
788  if (LongSize != 32) return false;
789  CallInst *CI = dyn_cast<CallInst>(I);
790  if (!CI || !CI->isInlineAsm()) return false;
791  if (CI->getNumArgOperands() <= 5) return false;
792  // We have inline assembly with quite a few arguments.
793  return true;
794}
795
796// Find all static Alloca instructions and put
797// poisoned red zones around all of them.
798// Then unpoison everything back before the function returns.
799//
800// Stack poisoning does not play well with exception handling.
801// When an exception is thrown, we essentially bypass the code
802// that unpoisones the stack. This is why the run-time library has
803// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
804// stack in the interceptor. This however does not work inside the
805// actual function which catches the exception. Most likely because the
806// compiler hoists the load of the shadow value somewhere too high.
807// This causes asan to report a non-existing bug on 453.povray.
808// It sounds like an LLVM bug.
809bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
810  if (!ClStack) return false;
811  SmallVector<AllocaInst*, 16> AllocaVec;
812  SmallVector<Instruction*, 8> RetVec;
813  uint64_t TotalSize = 0;
814
815  // Filter out Alloca instructions we want (and can) handle.
816  // Collect Ret instructions.
817  for (Function::iterator FI = F.begin(), FE = F.end();
818       FI != FE; ++FI) {
819    BasicBlock &BB = *FI;
820    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
821         BI != BE; ++BI) {
822      if (isa<ReturnInst>(BI)) {
823          RetVec.push_back(BI);
824          continue;
825      }
826
827      AllocaInst *AI = dyn_cast<AllocaInst>(BI);
828      if (!AI) continue;
829      if (AI->isArrayAllocation()) continue;
830      if (!AI->isStaticAlloca()) continue;
831      if (!AI->getAllocatedType()->isSized()) continue;
832      if (AI->getAlignment() > RedzoneSize) continue;
833      AllocaVec.push_back(AI);
834      uint64_t AlignedSize =  getAlignedAllocaSize(AI);
835      TotalSize += AlignedSize;
836    }
837  }
838
839  if (AllocaVec.empty()) return false;
840
841  uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize;
842
843  bool DoStackMalloc = ClUseAfterReturn
844      && LocalStackSize <= kMaxStackMallocSize;
845
846  Instruction *InsBefore = AllocaVec[0];
847  IRBuilder<> IRB(InsBefore);
848
849
850  Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
851  AllocaInst *MyAlloca =
852      new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
853  MyAlloca->setAlignment(RedzoneSize);
854  assert(MyAlloca->isStaticAlloca());
855  Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
856  Value *LocalStackBase = OrigStackBase;
857
858  if (DoStackMalloc) {
859    Value *AsanStackMallocFunc = M.getOrInsertFunction(
860        kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
861    LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
862        ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
863  }
864
865  // This string will be parsed by the run-time (DescribeStackAddress).
866  SmallString<2048> StackDescriptionStorage;
867  raw_svector_ostream StackDescription(StackDescriptionStorage);
868  StackDescription << F.getName() << " " << AllocaVec.size() << " ";
869
870  uint64_t Pos = RedzoneSize;
871  // Replace Alloca instructions with base+offset.
872  for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
873    AllocaInst *AI = AllocaVec[i];
874    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
875    StringRef Name = AI->getName();
876    StackDescription << Pos << " " << SizeInBytes << " "
877                     << Name.size() << " " << Name << " ";
878    uint64_t AlignedSize = getAlignedAllocaSize(AI);
879    assert((AlignedSize % RedzoneSize) == 0);
880    AI->replaceAllUsesWith(
881        IRB.CreateIntToPtr(
882            IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
883            AI->getType()));
884    Pos += AlignedSize + RedzoneSize;
885  }
886  assert(Pos == LocalStackSize);
887
888  // Write the Magic value and the frame description constant to the redzone.
889  Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
890  IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
891                  BasePlus0);
892  Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
893                                   ConstantInt::get(IntptrTy, LongSize/8));
894  BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
895  Value *Description = IRB.CreatePointerCast(
896      createPrivateGlobalForString(M, StackDescription.str()),
897      IntptrTy);
898  IRB.CreateStore(Description, BasePlus1);
899
900  // Poison the stack redzones at the entry.
901  Value *ShadowBase = memToShadow(LocalStackBase, IRB);
902  PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
903
904  Value *AsanStackFreeFunc = NULL;
905  if (DoStackMalloc) {
906    AsanStackFreeFunc = M.getOrInsertFunction(
907        kAsanStackFreeName, IRB.getVoidTy(),
908        IntptrTy, IntptrTy, IntptrTy, NULL);
909  }
910
911  // Unpoison the stack before all ret instructions.
912  for (size_t i = 0, n = RetVec.size(); i < n; i++) {
913    Instruction *Ret = RetVec[i];
914    IRBuilder<> IRBRet(Ret);
915
916    // Mark the current frame as retired.
917    IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
918                       BasePlus0);
919    // Unpoison the stack.
920    PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false);
921
922    if (DoStackMalloc) {
923      IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
924                         ConstantInt::get(IntptrTy, LocalStackSize),
925                         OrigStackBase);
926    }
927  }
928
929  if (ClDebugStack) {
930    DEBUG(dbgs() << F);
931  }
932
933  return true;
934}
935
936BlackList::BlackList(const std::string &Path) {
937  Functions = NULL;
938  const char *kFunPrefix = "fun:";
939  if (!ClBlackListFile.size()) return;
940  std::string Fun;
941
942  OwningPtr<MemoryBuffer> File;
943  if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) {
944    report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " +
945                       EC.message());
946  }
947  MemoryBuffer *Buff = File.take();
948  const char *Data = Buff->getBufferStart();
949  size_t DataLen = Buff->getBufferSize();
950  SmallVector<StringRef, 16> Lines;
951  SplitString(StringRef(Data, DataLen), Lines, "\n\r");
952  for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
953    if (Lines[i].startswith(kFunPrefix)) {
954      std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
955      std::string ThisFuncRE;
956      // add ThisFunc replacing * with .*
957      for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
958        if (ThisFunc[j] == '*')
959          ThisFuncRE += '.';
960        ThisFuncRE += ThisFunc[j];
961      }
962      // Check that the regexp is valid.
963      Regex CheckRE(ThisFuncRE);
964      std::string Error;
965      if (!CheckRE.isValid(Error))
966        report_fatal_error("malformed blacklist regex: " + ThisFunc +
967                           ": " + Error);
968      // Append to the final regexp.
969      if (Fun.size())
970        Fun += "|";
971      Fun += ThisFuncRE;
972    }
973  }
974  if (Fun.size()) {
975    Functions = new Regex(Fun);
976  }
977}
978
979bool BlackList::isIn(const Function &F) {
980  if (Functions) {
981    bool Res = Functions->match(F.getName());
982    return Res;
983  }
984  return false;
985}
986