MemorySanitizer.cpp revision 2aac38541708f37f9ddc5b2d3047b68835484a23
1//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file is a part of MemorySanitizer, a detector of uninitialized
11/// reads.
12///
13/// Status: early prototype.
14///
15/// The algorithm of the tool is similar to Memcheck
16/// (http://goo.gl/QKbem). We associate a few shadow bits with every
17/// byte of the application memory, poison the shadow of the malloc-ed
18/// or alloca-ed memory, load the shadow bits on every memory read,
19/// propagate the shadow bits through some of the arithmetic
20/// instruction (including MOV), store the shadow bits on every memory
21/// write, report a bug on some other instructions (e.g. JMP) if the
22/// associated shadow is poisoned.
23///
24/// But there are differences too. The first and the major one:
25/// compiler instrumentation instead of binary instrumentation. This
26/// gives us much better register allocation, possible compiler
27/// optimizations and a fast start-up. But this brings the major issue
28/// as well: msan needs to see all program events, including system
29/// calls and reads/writes in system libraries, so we either need to
30/// compile *everything* with msan or use a binary translation
31/// component (e.g. DynamoRIO) to instrument pre-built libraries.
32/// Another difference from Memcheck is that we use 8 shadow bits per
33/// byte of application memory and use a direct shadow mapping. This
34/// greatly simplifies the instrumentation code and avoids races on
35/// shadow updates (Memcheck is single-threaded so races are not a
36/// concern there. Memcheck uses 2 shadow bits per byte with a slow
37/// path storage that uses 8 bits per byte).
38///
39/// The default value of shadow is 0, which means "clean" (not poisoned).
40///
41/// Every module initializer should call __msan_init to ensure that the
42/// shadow memory is ready. On error, __msan_warning is called. Since
43/// parameters and return values may be passed via registers, we have a
44/// specialized thread-local shadow for return values
45/// (__msan_retval_tls) and parameters (__msan_param_tls).
46//===----------------------------------------------------------------------===//
47
48#define DEBUG_TYPE "msan"
49
50#include "BlackList.h"
51#include "llvm/DataLayout.h"
52#include "llvm/Function.h"
53#include "llvm/InlineAsm.h"
54#include "llvm/IntrinsicInst.h"
55#include "llvm/IRBuilder.h"
56#include "llvm/LLVMContext.h"
57#include "llvm/MDBuilder.h"
58#include "llvm/Module.h"
59#include "llvm/Type.h"
60#include "llvm/ADT/DepthFirstIterator.h"
61#include "llvm/ADT/SmallString.h"
62#include "llvm/ADT/SmallVector.h"
63#include "llvm/ADT/ValueMap.h"
64#include "llvm/Transforms/Instrumentation.h"
65#include "llvm/Transforms/Utils/BasicBlockUtils.h"
66#include "llvm/Transforms/Utils/ModuleUtils.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/InstVisitor.h"
71#include "llvm/Support/raw_ostream.h"
72#include "llvm/Transforms/Instrumentation.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/ModuleUtils.h"
75
76using namespace llvm;
77
78static const uint64_t kShadowMask32 = 1ULL << 31;
79static const uint64_t kShadowMask64 = 1ULL << 46;
80static const uint64_t kOriginOffset32 = 1ULL << 30;
81static const uint64_t kOriginOffset64 = 1ULL << 45;
82
83// This is an important flag that makes the reports much more
84// informative at the cost of greater slowdown. Not fully implemented
85// yet.
86// FIXME: this should be a top-level clang flag, e.g.
87// -fmemory-sanitizer-full.
88static cl::opt<bool> ClTrackOrigins("msan-track-origins",
89       cl::desc("Track origins (allocation sites) of poisoned memory"),
90       cl::Hidden, cl::init(false));
91static cl::opt<bool> ClKeepGoing("msan-keep-going",
92       cl::desc("keep going after reporting a UMR"),
93       cl::Hidden, cl::init(false));
94static cl::opt<bool> ClPoisonStack("msan-poison-stack",
95       cl::desc("poison uninitialized stack variables"),
96       cl::Hidden, cl::init(true));
97static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
98       cl::desc("poison uninitialized stack variables with a call"),
99       cl::Hidden, cl::init(false));
100static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
101       cl::desc("poison uninitialized stack variables with the given patter"),
102       cl::Hidden, cl::init(0xff));
103
104static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
105       cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
106       cl::Hidden, cl::init(true));
107
108// This flag controls whether we check the shadow of the address
109// operand of load or store. Such bugs are very rare, since load from
110// a garbage address typically results in SEGV, but still happen
111// (e.g. only lower bits of address are garbage, or the access happens
112// early at program startup where malloc-ed memory is more likely to
113// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
114static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
115       cl::desc("report accesses through a pointer which has poisoned shadow"),
116       cl::Hidden, cl::init(true));
117
118static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
119       cl::desc("print out instructions with default strict semantics"),
120       cl::Hidden, cl::init(false));
121
122static cl::opt<std::string>  ClBlackListFile("msan-blacklist",
123       cl::desc("File containing the list of functions where MemorySanitizer "
124                "should not report bugs"), cl::Hidden);
125
126namespace {
127
128/// \brief An instrumentation pass implementing detection of uninitialized
129/// reads.
130///
131/// MemorySanitizer: instrument the code in module to find
132/// uninitialized reads.
133class MemorySanitizer : public FunctionPass {
134public:
135  MemorySanitizer() : FunctionPass(ID), TD(0) { }
136  const char *getPassName() const { return "MemorySanitizer"; }
137  bool runOnFunction(Function &F);
138  bool doInitialization(Module &M);
139  static char ID;  // Pass identification, replacement for typeid.
140
141private:
142  DataLayout *TD;
143  LLVMContext *C;
144  Type *IntptrTy;
145  Type *OriginTy;
146  /// \brief Thread-local shadow storage for function parameters.
147  GlobalVariable *ParamTLS;
148  /// \brief Thread-local origin storage for function parameters.
149  GlobalVariable *ParamOriginTLS;
150  /// \brief Thread-local shadow storage for function return value.
151  GlobalVariable *RetvalTLS;
152  /// \brief Thread-local origin storage for function return value.
153  GlobalVariable *RetvalOriginTLS;
154  /// \brief Thread-local shadow storage for in-register va_arg function
155  /// parameters (x86_64-specific).
156  GlobalVariable *VAArgTLS;
157  /// \brief Thread-local shadow storage for va_arg overflow area
158  /// (x86_64-specific).
159  GlobalVariable *VAArgOverflowSizeTLS;
160  /// \brief Thread-local space used to pass origin value to the UMR reporting
161  /// function.
162  GlobalVariable *OriginTLS;
163
164  /// \brief The run-time callback to print a warning.
165  Value *WarningFn;
166  /// \brief Run-time helper that copies origin info for a memory range.
167  Value *MsanCopyOriginFn;
168  /// \brief Run-time helper that generates a new origin value for a stack
169  /// allocation.
170  Value *MsanSetAllocaOriginFn;
171  /// \brief Run-time helper that poisons stack on function entry.
172  Value *MsanPoisonStackFn;
173  /// \brief MSan runtime replacements for memmove, memcpy and memset.
174  Value *MemmoveFn, *MemcpyFn, *MemsetFn;
175
176  /// \brief Address mask used in application-to-shadow address calculation.
177  /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask.
178  uint64_t ShadowMask;
179  /// \brief Offset of the origin shadow from the "normal" shadow.
180  /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL
181  uint64_t OriginOffset;
182  /// \brief Branch weights for error reporting.
183  MDNode *ColdCallWeights;
184  /// \brief The blacklist.
185  OwningPtr<BlackList> BL;
186  /// \brief An empty volatile inline asm that prevents callback merge.
187  InlineAsm *EmptyAsm;
188
189  friend struct MemorySanitizerVisitor;
190  friend struct VarArgAMD64Helper;
191};
192}  // namespace
193
194char MemorySanitizer::ID = 0;
195INITIALIZE_PASS(MemorySanitizer, "msan",
196                "MemorySanitizer: detects uninitialized reads.",
197                false, false)
198
199FunctionPass *llvm::createMemorySanitizerPass() {
200  return new MemorySanitizer();
201}
202
203/// \brief Create a non-const global initialized with the given string.
204///
205/// Creates a writable global for Str so that we can pass it to the
206/// run-time lib. Runtime uses first 4 bytes of the string to store the
207/// frame ID, so the string needs to be mutable.
208static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
209                                                            StringRef Str) {
210  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
211  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
212                            GlobalValue::PrivateLinkage, StrConst, "");
213}
214
215/// \brief Module-level initialization.
216///
217/// Obtains pointers to the required runtime library functions, and
218/// inserts a call to __msan_init to the module's constructor list.
219bool MemorySanitizer::doInitialization(Module &M) {
220  TD = getAnalysisIfAvailable<DataLayout>();
221  if (!TD)
222    return false;
223  BL.reset(new BlackList(ClBlackListFile));
224  C = &(M.getContext());
225  unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
226  switch (PtrSize) {
227    case 64:
228      ShadowMask = kShadowMask64;
229      OriginOffset = kOriginOffset64;
230      break;
231    case 32:
232      ShadowMask = kShadowMask32;
233      OriginOffset = kOriginOffset32;
234      break;
235    default:
236      report_fatal_error("unsupported pointer size");
237      break;
238  }
239
240  IRBuilder<> IRB(*C);
241  IntptrTy = IRB.getIntPtrTy(TD);
242  OriginTy = IRB.getInt32Ty();
243
244  ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
245
246  // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
247  appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
248                      "__msan_init", IRB.getVoidTy(), NULL)), 0);
249
250  new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::LinkOnceODRLinkage,
251                     IRB.getInt32(ClTrackOrigins), "__msan_track_origins");
252
253  // Create the callback.
254  // FIXME: this function should have "Cold" calling conv,
255  // which is not yet implemented.
256  StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
257                                        : "__msan_warning_noreturn";
258  WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
259
260  MsanCopyOriginFn = M.getOrInsertFunction(
261    "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
262    IRB.getInt8PtrTy(), IntptrTy, NULL);
263  MsanSetAllocaOriginFn = M.getOrInsertFunction(
264    "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
265    IRB.getInt8PtrTy(), NULL);
266  MsanPoisonStackFn = M.getOrInsertFunction(
267    "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
268  MemmoveFn = M.getOrInsertFunction(
269    "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
270    IntptrTy, NULL);
271  MemcpyFn = M.getOrInsertFunction(
272    "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
273    IntptrTy, NULL);
274  MemsetFn = M.getOrInsertFunction(
275    "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
276    IntptrTy, NULL);
277
278  // Create globals.
279  RetvalTLS = new GlobalVariable(
280    M, ArrayType::get(IRB.getInt64Ty(), 8), false,
281    GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
282    GlobalVariable::GeneralDynamicTLSModel);
283  RetvalOriginTLS = new GlobalVariable(
284    M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
285    "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
286
287  ParamTLS = new GlobalVariable(
288    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
289    GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
290    GlobalVariable::GeneralDynamicTLSModel);
291  ParamOriginTLS = new GlobalVariable(
292    M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
293    0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
294
295  VAArgTLS = new GlobalVariable(
296    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
297    GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
298    GlobalVariable::GeneralDynamicTLSModel);
299  VAArgOverflowSizeTLS = new GlobalVariable(
300    M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
301    "__msan_va_arg_overflow_size_tls", 0,
302    GlobalVariable::GeneralDynamicTLSModel);
303  OriginTLS = new GlobalVariable(
304    M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
305    "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
306
307  // We insert an empty inline asm after __msan_report* to avoid callback merge.
308  EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
309                            StringRef(""), StringRef(""),
310                            /*hasSideEffects=*/true);
311  return true;
312}
313
314namespace {
315
316/// \brief A helper class that handles instrumentation of VarArg
317/// functions on a particular platform.
318///
319/// Implementations are expected to insert the instrumentation
320/// necessary to propagate argument shadow through VarArg function
321/// calls. Visit* methods are called during an InstVisitor pass over
322/// the function, and should avoid creating new basic blocks. A new
323/// instance of this class is created for each instrumented function.
324struct VarArgHelper {
325  /// \brief Visit a CallSite.
326  virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
327
328  /// \brief Visit a va_start call.
329  virtual void visitVAStartInst(VAStartInst &I) = 0;
330
331  /// \brief Visit a va_copy call.
332  virtual void visitVACopyInst(VACopyInst &I) = 0;
333
334  /// \brief Finalize function instrumentation.
335  ///
336  /// This method is called after visiting all interesting (see above)
337  /// instructions in a function.
338  virtual void finalizeInstrumentation() = 0;
339
340  virtual ~VarArgHelper() {}
341};
342
343struct MemorySanitizerVisitor;
344
345VarArgHelper*
346CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
347                   MemorySanitizerVisitor &Visitor);
348
349/// This class does all the work for a given function. Store and Load
350/// instructions store and load corresponding shadow and origin
351/// values. Most instructions propagate shadow from arguments to their
352/// return values. Certain instructions (most importantly, BranchInst)
353/// test their argument shadow and print reports (with a runtime call) if it's
354/// non-zero.
355struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
356  Function &F;
357  MemorySanitizer &MS;
358  SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
359  ValueMap<Value*, Value*> ShadowMap, OriginMap;
360  bool InsertChecks;
361  OwningPtr<VarArgHelper> VAHelper;
362
363  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
364  // See a comment in visitCallSite for more details.
365  static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
366  static const unsigned AMD64FpEndOffset = 176;
367
368  struct ShadowOriginAndInsertPoint {
369    Instruction *Shadow;
370    Instruction *Origin;
371    Instruction *OrigIns;
372    ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
373      : Shadow(S), Origin(O), OrigIns(I) { }
374    ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
375  };
376  SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
377
378  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
379    : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
380    InsertChecks = !MS.BL->isIn(F);
381    DEBUG(if (!InsertChecks)
382            dbgs() << "MemorySanitizer is not inserting checks into '"
383                   << F.getName() << "'\n");
384  }
385
386  void materializeChecks() {
387    for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
388      Instruction *Shadow = InstrumentationList[i].Shadow;
389      Instruction *OrigIns = InstrumentationList[i].OrigIns;
390      IRBuilder<> IRB(OrigIns);
391      DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
392      Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
393      DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
394      Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
395                                    getCleanShadow(ConvertedShadow), "_mscmp");
396      Instruction *CheckTerm =
397        SplitBlockAndInsertIfThen(cast<Instruction>(Cmp),
398                                  /* Unreachable */ !ClKeepGoing,
399                                  MS.ColdCallWeights);
400
401      IRB.SetInsertPoint(CheckTerm);
402      if (ClTrackOrigins) {
403        Instruction *Origin = InstrumentationList[i].Origin;
404        IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
405                        MS.OriginTLS);
406      }
407      CallInst *Call = IRB.CreateCall(MS.WarningFn);
408      Call->setDebugLoc(OrigIns->getDebugLoc());
409      IRB.CreateCall(MS.EmptyAsm);
410      DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
411    }
412    DEBUG(dbgs() << "DONE:\n" << F);
413  }
414
415  /// \brief Add MemorySanitizer instrumentation to a function.
416  bool runOnFunction() {
417    if (!MS.TD) return false;
418    // Iterate all BBs in depth-first order and create shadow instructions
419    // for all instructions (where applicable).
420    // For PHI nodes we create dummy shadow PHIs which will be finalized later.
421    for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
422         DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
423      BasicBlock *BB = *DI;
424      visit(*BB);
425    }
426
427    // Finalize PHI nodes.
428    for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
429      PHINode *PN = ShadowPHINodes[i];
430      PHINode *PNS = cast<PHINode>(getShadow(PN));
431      PHINode *PNO = ClTrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
432      size_t NumValues = PN->getNumIncomingValues();
433      for (size_t v = 0; v < NumValues; v++) {
434        PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
435        if (PNO)
436          PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
437      }
438    }
439
440    VAHelper->finalizeInstrumentation();
441
442    materializeChecks();
443
444    return true;
445  }
446
447  /// \brief Compute the shadow type that corresponds to a given Value.
448  Type *getShadowTy(Value *V) {
449    return getShadowTy(V->getType());
450  }
451
452  /// \brief Compute the shadow type that corresponds to a given Type.
453  Type *getShadowTy(Type *OrigTy) {
454    if (!OrigTy->isSized()) {
455      return 0;
456    }
457    // For integer type, shadow is the same as the original type.
458    // This may return weird-sized types like i1.
459    if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
460      return IT;
461    if (VectorType *VT = dyn_cast<VectorType>(OrigTy))
462      return VectorType::getInteger(VT);
463    if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
464      SmallVector<Type*, 4> Elements;
465      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
466        Elements.push_back(getShadowTy(ST->getElementType(i)));
467      StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
468      DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
469      return Res;
470    }
471    uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy);
472    return IntegerType::get(*MS.C, TypeSize);
473  }
474
475  /// \brief Flatten a vector type.
476  Type *getShadowTyNoVec(Type *ty) {
477    if (VectorType *vt = dyn_cast<VectorType>(ty))
478      return IntegerType::get(*MS.C, vt->getBitWidth());
479    return ty;
480  }
481
482  /// \brief Convert a shadow value to it's flattened variant.
483  Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
484    Type *Ty = V->getType();
485    Type *NoVecTy = getShadowTyNoVec(Ty);
486    if (Ty == NoVecTy) return V;
487    return IRB.CreateBitCast(V, NoVecTy);
488  }
489
490  /// \brief Compute the shadow address that corresponds to a given application
491  /// address.
492  ///
493  /// Shadow = Addr & ~ShadowMask.
494  Value *getShadowPtr(Value *Addr, Type *ShadowTy,
495                      IRBuilder<> &IRB) {
496    Value *ShadowLong =
497      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
498                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
499    return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
500  }
501
502  /// \brief Compute the origin address that corresponds to a given application
503  /// address.
504  ///
505  /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL
506  Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) {
507    Value *ShadowLong =
508      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
509                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
510    Value *Add =
511      IRB.CreateAdd(ShadowLong,
512                    ConstantInt::get(MS.IntptrTy, MS.OriginOffset));
513    Value *SecondAnd =
514      IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL));
515    return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0));
516  }
517
518  /// \brief Compute the shadow address for a given function argument.
519  ///
520  /// Shadow = ParamTLS+ArgOffset.
521  Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
522                                 int ArgOffset) {
523    Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
524    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
525    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
526                              "_msarg");
527  }
528
529  /// \brief Compute the origin address for a given function argument.
530  Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
531                                 int ArgOffset) {
532    if (!ClTrackOrigins) return 0;
533    Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
534    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
535    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
536                              "_msarg_o");
537  }
538
539  /// \brief Compute the shadow address for a retval.
540  Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
541    Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy);
542    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
543                              "_msret");
544  }
545
546  /// \brief Compute the origin address for a retval.
547  Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
548    // We keep a single origin for the entire retval. Might be too optimistic.
549    return MS.RetvalOriginTLS;
550  }
551
552  /// \brief Set SV to be the shadow value for V.
553  void setShadow(Value *V, Value *SV) {
554    assert(!ShadowMap.count(V) && "Values may only have one shadow");
555    ShadowMap[V] = SV;
556  }
557
558  /// \brief Set Origin to be the origin value for V.
559  void setOrigin(Value *V, Value *Origin) {
560    if (!ClTrackOrigins) return;
561    assert(!OriginMap.count(V) && "Values may only have one origin");
562    DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
563    OriginMap[V] = Origin;
564  }
565
566  /// \brief Create a clean shadow value for a given value.
567  ///
568  /// Clean shadow (all zeroes) means all bits of the value are defined
569  /// (initialized).
570  Value *getCleanShadow(Value *V) {
571    Type *ShadowTy = getShadowTy(V);
572    if (!ShadowTy)
573      return 0;
574    return Constant::getNullValue(ShadowTy);
575  }
576
577  /// \brief Create a dirty shadow of a given shadow type.
578  Constant *getPoisonedShadow(Type *ShadowTy) {
579    assert(ShadowTy);
580    if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
581      return Constant::getAllOnesValue(ShadowTy);
582    StructType *ST = cast<StructType>(ShadowTy);
583    SmallVector<Constant *, 4> Vals;
584    for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
585      Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
586    return ConstantStruct::get(ST, Vals);
587  }
588
589  /// \brief Create a clean (zero) origin.
590  Value *getCleanOrigin() {
591    return Constant::getNullValue(MS.OriginTy);
592  }
593
594  /// \brief Get the shadow value for a given Value.
595  ///
596  /// This function either returns the value set earlier with setShadow,
597  /// or extracts if from ParamTLS (for function arguments).
598  Value *getShadow(Value *V) {
599    if (Instruction *I = dyn_cast<Instruction>(V)) {
600      // For instructions the shadow is already stored in the map.
601      Value *Shadow = ShadowMap[V];
602      if (!Shadow) {
603        DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
604        assert(Shadow && "No shadow for a value");
605      }
606      return Shadow;
607    }
608    if (UndefValue *U = dyn_cast<UndefValue>(V)) {
609      Value *AllOnes = getPoisonedShadow(getShadowTy(V));
610      DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
611      return AllOnes;
612    }
613    if (Argument *A = dyn_cast<Argument>(V)) {
614      // For arguments we compute the shadow on demand and store it in the map.
615      Value **ShadowPtr = &ShadowMap[V];
616      if (*ShadowPtr)
617        return *ShadowPtr;
618      Function *F = A->getParent();
619      IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
620      unsigned ArgOffset = 0;
621      for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
622           AI != AE; ++AI) {
623        if (!AI->getType()->isSized()) {
624          DEBUG(dbgs() << "Arg is not sized\n");
625          continue;
626        }
627        unsigned Size = AI->hasByValAttr()
628          ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType())
629          : MS.TD->getTypeAllocSize(AI->getType());
630        if (A == AI) {
631          Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
632          if (AI->hasByValAttr()) {
633            // ByVal pointer itself has clean shadow. We copy the actual
634            // argument shadow to the underlying memory.
635            Value *Cpy = EntryIRB.CreateMemCpy(
636              getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
637              Base, Size, AI->getParamAlignment());
638            DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
639            *ShadowPtr = getCleanShadow(V);
640          } else {
641            *ShadowPtr = EntryIRB.CreateLoad(Base);
642          }
643          DEBUG(dbgs() << "  ARG:    "  << *AI << " ==> " <<
644                **ShadowPtr << "\n");
645          if (ClTrackOrigins) {
646            Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset);
647            setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
648          }
649        }
650        ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
651      }
652      assert(*ShadowPtr && "Could not find shadow for an argument");
653      return *ShadowPtr;
654    }
655    // For everything else the shadow is zero.
656    return getCleanShadow(V);
657  }
658
659  /// \brief Get the shadow for i-th argument of the instruction I.
660  Value *getShadow(Instruction *I, int i) {
661    return getShadow(I->getOperand(i));
662  }
663
664  /// \brief Get the origin for a value.
665  Value *getOrigin(Value *V) {
666    if (!ClTrackOrigins) return 0;
667    if (isa<Instruction>(V) || isa<Argument>(V)) {
668      Value *Origin = OriginMap[V];
669      if (!Origin) {
670        DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n");
671        Origin = getCleanOrigin();
672      }
673      return Origin;
674    }
675    return getCleanOrigin();
676  }
677
678  /// \brief Get the origin for i-th argument of the instruction I.
679  Value *getOrigin(Instruction *I, int i) {
680    return getOrigin(I->getOperand(i));
681  }
682
683  /// \brief Remember the place where a shadow check should be inserted.
684  ///
685  /// This location will be later instrumented with a check that will print a
686  /// UMR warning in runtime if the value is not fully defined.
687  void insertCheck(Value *Val, Instruction *OrigIns) {
688    assert(Val);
689    if (!InsertChecks) return;
690    Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
691    if (!Shadow) return;
692    Type *ShadowTy = Shadow->getType();
693    assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
694           "Can only insert checks for integer and vector shadow types");
695    Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
696    InstrumentationList.push_back(
697      ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
698  }
699
700  //------------------- Visitors.
701
702  /// \brief Instrument LoadInst
703  ///
704  /// Loads the corresponding shadow and (optionally) origin.
705  /// Optionally, checks that the load address is fully defined.
706  void visitLoadInst(LoadInst &I) {
707    Type *LoadTy = I.getType();
708    assert(LoadTy->isSized() && "Load type must have size");
709    IRBuilder<> IRB(&I);
710    Type *ShadowTy = getShadowTy(&I);
711    Value *Addr = I.getPointerOperand();
712    Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
713    setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
714
715    if (ClCheckAccessAddress)
716      insertCheck(I.getPointerOperand(), &I);
717
718    if (ClTrackOrigins)
719      setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), I.getAlignment()));
720  }
721
722  /// \brief Instrument StoreInst
723  ///
724  /// Stores the corresponding shadow and (optionally) origin.
725  /// Optionally, checks that the store address is fully defined.
726  /// Volatile stores check that the value being stored is fully defined.
727  void visitStoreInst(StoreInst &I) {
728    IRBuilder<> IRB(&I);
729    Value *Val = I.getValueOperand();
730    Value *Addr = I.getPointerOperand();
731    Value *Shadow = getShadow(Val);
732    Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
733
734    StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
735    DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
736    // If the store is volatile, add a check.
737    if (I.isVolatile())
738      insertCheck(Val, &I);
739    if (ClCheckAccessAddress)
740      insertCheck(Addr, &I);
741
742    if (ClTrackOrigins)
743      IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB), I.getAlignment());
744  }
745
746  // Vector manipulation.
747  void visitExtractElementInst(ExtractElementInst &I) {
748    insertCheck(I.getOperand(1), &I);
749    IRBuilder<> IRB(&I);
750    setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
751              "_msprop"));
752    setOrigin(&I, getOrigin(&I, 0));
753  }
754
755  void visitInsertElementInst(InsertElementInst &I) {
756    insertCheck(I.getOperand(2), &I);
757    IRBuilder<> IRB(&I);
758    setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
759              I.getOperand(2), "_msprop"));
760    setOriginForNaryOp(I);
761  }
762
763  void visitShuffleVectorInst(ShuffleVectorInst &I) {
764    insertCheck(I.getOperand(2), &I);
765    IRBuilder<> IRB(&I);
766    setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
767              I.getOperand(2), "_msprop"));
768    setOriginForNaryOp(I);
769  }
770
771  // Casts.
772  void visitSExtInst(SExtInst &I) {
773    IRBuilder<> IRB(&I);
774    setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
775    setOrigin(&I, getOrigin(&I, 0));
776  }
777
778  void visitZExtInst(ZExtInst &I) {
779    IRBuilder<> IRB(&I);
780    setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
781    setOrigin(&I, getOrigin(&I, 0));
782  }
783
784  void visitTruncInst(TruncInst &I) {
785    IRBuilder<> IRB(&I);
786    setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
787    setOrigin(&I, getOrigin(&I, 0));
788  }
789
790  void visitBitCastInst(BitCastInst &I) {
791    IRBuilder<> IRB(&I);
792    setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
793    setOrigin(&I, getOrigin(&I, 0));
794  }
795
796  void visitPtrToIntInst(PtrToIntInst &I) {
797    IRBuilder<> IRB(&I);
798    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
799             "_msprop_ptrtoint"));
800    setOrigin(&I, getOrigin(&I, 0));
801  }
802
803  void visitIntToPtrInst(IntToPtrInst &I) {
804    IRBuilder<> IRB(&I);
805    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
806             "_msprop_inttoptr"));
807    setOrigin(&I, getOrigin(&I, 0));
808  }
809
810  void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
811  void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
812  void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
813  void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
814  void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
815  void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
816
817  /// \brief Propagate shadow for bitwise AND.
818  ///
819  /// This code is exact, i.e. if, for example, a bit in the left argument
820  /// is defined and 0, then neither the value not definedness of the
821  /// corresponding bit in B don't affect the resulting shadow.
822  void visitAnd(BinaryOperator &I) {
823    IRBuilder<> IRB(&I);
824    //  "And" of 0 and a poisoned value results in unpoisoned value.
825    //  1&1 => 1;     0&1 => 0;     p&1 => p;
826    //  1&0 => 0;     0&0 => 0;     p&0 => 0;
827    //  1&p => p;     0&p => 0;     p&p => p;
828    //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
829    Value *S1 = getShadow(&I, 0);
830    Value *S2 = getShadow(&I, 1);
831    Value *V1 = I.getOperand(0);
832    Value *V2 = I.getOperand(1);
833    if (V1->getType() != S1->getType()) {
834      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
835      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
836    }
837    Value *S1S2 = IRB.CreateAnd(S1, S2);
838    Value *V1S2 = IRB.CreateAnd(V1, S2);
839    Value *S1V2 = IRB.CreateAnd(S1, V2);
840    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
841    setOriginForNaryOp(I);
842  }
843
844  void visitOr(BinaryOperator &I) {
845    IRBuilder<> IRB(&I);
846    //  "Or" of 1 and a poisoned value results in unpoisoned value.
847    //  1|1 => 1;     0|1 => 1;     p|1 => 1;
848    //  1|0 => 1;     0|0 => 0;     p|0 => p;
849    //  1|p => 1;     0|p => p;     p|p => p;
850    //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
851    Value *S1 = getShadow(&I, 0);
852    Value *S2 = getShadow(&I, 1);
853    Value *V1 = IRB.CreateNot(I.getOperand(0));
854    Value *V2 = IRB.CreateNot(I.getOperand(1));
855    if (V1->getType() != S1->getType()) {
856      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
857      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
858    }
859    Value *S1S2 = IRB.CreateAnd(S1, S2);
860    Value *V1S2 = IRB.CreateAnd(V1, S2);
861    Value *S1V2 = IRB.CreateAnd(S1, V2);
862    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
863    setOriginForNaryOp(I);
864  }
865
866  /// \brief Propagate origin for an instruction.
867  ///
868  /// This is a general case of origin propagation. For an Nary operation,
869  /// is set to the origin of an argument that is not entirely initialized.
870  /// If there is more than one such arguments, the rightmost of them is picked.
871  /// It does not matter which one is picked if all arguments are initialized.
872  void setOriginForNaryOp(Instruction &I) {
873    if (!ClTrackOrigins) return;
874    IRBuilder<> IRB(&I);
875    Value *Origin = getOrigin(&I, 0);
876    for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) {
877      Value *S = convertToShadowTyNoVec(getShadow(&I, Op), IRB);
878      Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)),
879                                getOrigin(&I, Op), Origin);
880    }
881    setOrigin(&I, Origin);
882  }
883
884  /// \brief Propagate shadow for a binary operation.
885  ///
886  /// Shadow = Shadow0 | Shadow1, all 3 must have the same type.
887  /// Bitwise OR is selected as an operation that will never lose even a bit of
888  /// poison.
889  void handleShadowOrBinary(Instruction &I) {
890    IRBuilder<> IRB(&I);
891    Value *Shadow0 = getShadow(&I, 0);
892    Value *Shadow1 = getShadow(&I, 1);
893    setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop"));
894    setOriginForNaryOp(I);
895  }
896
897  /// \brief Propagate shadow for arbitrary operation.
898  ///
899  /// This is a general case of shadow propagation, used in all cases where we
900  /// don't know and/or care about what the operation actually does.
901  /// It converts all input shadow values to a common type (extending or
902  /// truncating as necessary), and bitwise OR's them.
903  ///
904  /// This is much cheaper than inserting checks (i.e. requiring inputs to be
905  /// fully initialized), and less prone to false positives.
906  // FIXME: is the casting actually correct?
907  // FIXME: merge this with handleShadowOrBinary.
908  void handleShadowOr(Instruction &I) {
909    IRBuilder<> IRB(&I);
910    Value *Shadow = getShadow(&I, 0);
911    for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op)
912      Shadow = IRB.CreateOr(
913        Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false),
914        "_msprop");
915    Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false);
916    setShadow(&I, Shadow);
917    setOriginForNaryOp(I);
918  }
919
920  void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
921  void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); }
922  void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); }
923  void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
924  void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); }
925  void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); }
926  void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); }
927
928  void handleDiv(Instruction &I) {
929    IRBuilder<> IRB(&I);
930    // Strict on the second argument.
931    insertCheck(I.getOperand(1), &I);
932    setShadow(&I, getShadow(&I, 0));
933    setOrigin(&I, getOrigin(&I, 0));
934  }
935
936  void visitUDiv(BinaryOperator &I) { handleDiv(I); }
937  void visitSDiv(BinaryOperator &I) { handleDiv(I); }
938  void visitFDiv(BinaryOperator &I) { handleDiv(I); }
939  void visitURem(BinaryOperator &I) { handleDiv(I); }
940  void visitSRem(BinaryOperator &I) { handleDiv(I); }
941  void visitFRem(BinaryOperator &I) { handleDiv(I); }
942
943  /// \brief Instrument == and != comparisons.
944  ///
945  /// Sometimes the comparison result is known even if some of the bits of the
946  /// arguments are not.
947  void handleEqualityComparison(ICmpInst &I) {
948    IRBuilder<> IRB(&I);
949    Value *A = I.getOperand(0);
950    Value *B = I.getOperand(1);
951    Value *Sa = getShadow(A);
952    Value *Sb = getShadow(B);
953    if (A->getType()->isPointerTy())
954      A = IRB.CreatePointerCast(A, MS.IntptrTy);
955    if (B->getType()->isPointerTy())
956      B = IRB.CreatePointerCast(B, MS.IntptrTy);
957    // A == B  <==>  (C = A^B) == 0
958    // A != B  <==>  (C = A^B) != 0
959    // Sc = Sa | Sb
960    Value *C = IRB.CreateXor(A, B);
961    Value *Sc = IRB.CreateOr(Sa, Sb);
962    // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
963    // Result is defined if one of the following is true
964    // * there is a defined 1 bit in C
965    // * C is fully defined
966    // Si = !(C & ~Sc) && Sc
967    Value *Zero = Constant::getNullValue(Sc->getType());
968    Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
969    Value *Si =
970      IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
971                    IRB.CreateICmpEQ(
972                      IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
973    Si->setName("_msprop_icmp");
974    setShadow(&I, Si);
975    setOriginForNaryOp(I);
976  }
977
978  /// \brief Instrument signed relational comparisons.
979  ///
980  /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
981  /// propagating the highest bit of the shadow. Everything else is delegated
982  /// to handleShadowOr().
983  void handleSignedRelationalComparison(ICmpInst &I) {
984    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
985    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
986    Value* op = NULL;
987    CmpInst::Predicate pre = I.getPredicate();
988    if (constOp0 && constOp0->isNullValue() &&
989        (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
990      op = I.getOperand(1);
991    } else if (constOp1 && constOp1->isNullValue() &&
992               (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
993      op = I.getOperand(0);
994    }
995    if (op) {
996      IRBuilder<> IRB(&I);
997      Value* Shadow =
998        IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
999      setShadow(&I, Shadow);
1000      setOrigin(&I, getOrigin(op));
1001    } else {
1002      handleShadowOr(I);
1003    }
1004  }
1005
1006  void visitICmpInst(ICmpInst &I) {
1007    if (ClHandleICmp && I.isEquality())
1008      handleEqualityComparison(I);
1009    else if (ClHandleICmp && I.isSigned() && I.isRelational())
1010      handleSignedRelationalComparison(I);
1011    else
1012      handleShadowOr(I);
1013  }
1014
1015  void visitFCmpInst(FCmpInst &I) {
1016    handleShadowOr(I);
1017  }
1018
1019  void handleShift(BinaryOperator &I) {
1020    IRBuilder<> IRB(&I);
1021    // If any of the S2 bits are poisoned, the whole thing is poisoned.
1022    // Otherwise perform the same shift on S1.
1023    Value *S1 = getShadow(&I, 0);
1024    Value *S2 = getShadow(&I, 1);
1025    Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
1026                                   S2->getType());
1027    Value *V2 = I.getOperand(1);
1028    Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
1029    setShadow(&I, IRB.CreateOr(Shift, S2Conv));
1030    setOriginForNaryOp(I);
1031  }
1032
1033  void visitShl(BinaryOperator &I) { handleShift(I); }
1034  void visitAShr(BinaryOperator &I) { handleShift(I); }
1035  void visitLShr(BinaryOperator &I) { handleShift(I); }
1036
1037  /// \brief Instrument llvm.memmove
1038  ///
1039  /// At this point we don't know if llvm.memmove will be inlined or not.
1040  /// If we don't instrument it and it gets inlined,
1041  /// our interceptor will not kick in and we will lose the memmove.
1042  /// If we instrument the call here, but it does not get inlined,
1043  /// we will memove the shadow twice: which is bad in case
1044  /// of overlapping regions. So, we simply lower the intrinsic to a call.
1045  ///
1046  /// Similar situation exists for memcpy and memset.
1047  void visitMemMoveInst(MemMoveInst &I) {
1048    IRBuilder<> IRB(&I);
1049    IRB.CreateCall3(
1050      MS.MemmoveFn,
1051      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
1052      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
1053      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
1054    I.eraseFromParent();
1055  }
1056
1057  // Similar to memmove: avoid copying shadow twice.
1058  // This is somewhat unfortunate as it may slowdown small constant memcpys.
1059  // FIXME: consider doing manual inline for small constant sizes and proper
1060  // alignment.
1061  void visitMemCpyInst(MemCpyInst &I) {
1062    IRBuilder<> IRB(&I);
1063    IRB.CreateCall3(
1064      MS.MemcpyFn,
1065      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
1066      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
1067      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
1068    I.eraseFromParent();
1069  }
1070
1071  // Same as memcpy.
1072  void visitMemSetInst(MemSetInst &I) {
1073    IRBuilder<> IRB(&I);
1074    IRB.CreateCall3(
1075      MS.MemsetFn,
1076      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
1077      IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
1078      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
1079    I.eraseFromParent();
1080  }
1081
1082  void visitVAStartInst(VAStartInst &I) {
1083    VAHelper->visitVAStartInst(I);
1084  }
1085
1086  void visitVACopyInst(VACopyInst &I) {
1087    VAHelper->visitVACopyInst(I);
1088  }
1089
1090  void visitCallSite(CallSite CS) {
1091    Instruction &I = *CS.getInstruction();
1092    assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
1093    if (CS.isCall()) {
1094      CallInst *Call = cast<CallInst>(&I);
1095
1096      // For inline asm, do the usual thing: check argument shadow and mark all
1097      // outputs as clean. Note that any side effects of the inline asm that are
1098      // not immediately visible in its constraints are not handled.
1099      if (Call->isInlineAsm()) {
1100        visitInstruction(I);
1101        return;
1102      }
1103
1104      // Allow only tail calls with the same types, otherwise
1105      // we may have a false positive: shadow for a non-void RetVal
1106      // will get propagated to a void RetVal.
1107      if (Call->isTailCall() && Call->getType() != Call->getParent()->getType())
1108        Call->setTailCall(false);
1109      if (isa<IntrinsicInst>(&I)) {
1110        // All intrinsics we care about are handled in corresponding visit*
1111        // methods. Add checks for the arguments, mark retval as clean.
1112        visitInstruction(I);
1113        return;
1114      }
1115    }
1116    IRBuilder<> IRB(&I);
1117    unsigned ArgOffset = 0;
1118    DEBUG(dbgs() << "  CallSite: " << I << "\n");
1119    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
1120         ArgIt != End; ++ArgIt) {
1121      Value *A = *ArgIt;
1122      unsigned i = ArgIt - CS.arg_begin();
1123      if (!A->getType()->isSized()) {
1124        DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
1125        continue;
1126      }
1127      unsigned Size = 0;
1128      Value *Store = 0;
1129      // Compute the Shadow for arg even if it is ByVal, because
1130      // in that case getShadow() will copy the actual arg shadow to
1131      // __msan_param_tls.
1132      Value *ArgShadow = getShadow(A);
1133      Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
1134      DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
1135            " Shadow: " << *ArgShadow << "\n");
1136      if (CS.paramHasAttr(i + 1, Attributes::ByVal)) {
1137        assert(A->getType()->isPointerTy() &&
1138               "ByVal argument is not a pointer!");
1139        Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType());
1140        unsigned Alignment = CS.getParamAlignment(i + 1);
1141        Store = IRB.CreateMemCpy(ArgShadowBase,
1142                                 getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
1143                                 Size, Alignment);
1144      } else {
1145        Size = MS.TD->getTypeAllocSize(A->getType());
1146        Store = IRB.CreateStore(ArgShadow, ArgShadowBase);
1147      }
1148      if (ClTrackOrigins)
1149        IRB.CreateStore(getOrigin(A),
1150                        getOriginPtrForArgument(A, IRB, ArgOffset));
1151      assert(Size != 0 && Store != 0);
1152      DEBUG(dbgs() << "  Param:" << *Store << "\n");
1153      ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
1154    }
1155    DEBUG(dbgs() << "  done with call args\n");
1156
1157    FunctionType *FT =
1158      cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
1159    if (FT->isVarArg()) {
1160      VAHelper->visitCallSite(CS, IRB);
1161    }
1162
1163    // Now, get the shadow for the RetVal.
1164    if (!I.getType()->isSized()) return;
1165    IRBuilder<> IRBBefore(&I);
1166    // Untill we have full dynamic coverage, make sure the retval shadow is 0.
1167    Value *Base = getShadowPtrForRetval(&I, IRBBefore);
1168    IRBBefore.CreateStore(getCleanShadow(&I), Base);
1169    Instruction *NextInsn = 0;
1170    if (CS.isCall()) {
1171      NextInsn = I.getNextNode();
1172    } else {
1173      BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
1174      if (!NormalDest->getSinglePredecessor()) {
1175        // FIXME: this case is tricky, so we are just conservative here.
1176        // Perhaps we need to split the edge between this BB and NormalDest,
1177        // but a naive attempt to use SplitEdge leads to a crash.
1178        setShadow(&I, getCleanShadow(&I));
1179        setOrigin(&I, getCleanOrigin());
1180        return;
1181      }
1182      NextInsn = NormalDest->getFirstInsertionPt();
1183      assert(NextInsn &&
1184             "Could not find insertion point for retval shadow load");
1185    }
1186    IRBuilder<> IRBAfter(NextInsn);
1187    setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter),
1188                                      "_msret"));
1189    if (ClTrackOrigins)
1190      setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
1191  }
1192
1193  void visitReturnInst(ReturnInst &I) {
1194    IRBuilder<> IRB(&I);
1195    if (Value *RetVal = I.getReturnValue()) {
1196      // Set the shadow for the RetVal.
1197      Value *Shadow = getShadow(RetVal);
1198      Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
1199      DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
1200      IRB.CreateStore(Shadow, ShadowPtr);
1201      if (ClTrackOrigins)
1202        IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
1203    }
1204  }
1205
1206  void visitPHINode(PHINode &I) {
1207    IRBuilder<> IRB(&I);
1208    ShadowPHINodes.push_back(&I);
1209    setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
1210                                "_msphi_s"));
1211    if (ClTrackOrigins)
1212      setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
1213                                  "_msphi_o"));
1214  }
1215
1216  void visitAllocaInst(AllocaInst &I) {
1217    setShadow(&I, getCleanShadow(&I));
1218    if (!ClPoisonStack) return;
1219    IRBuilder<> IRB(I.getNextNode());
1220    uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
1221    if (ClPoisonStackWithCall) {
1222      IRB.CreateCall2(MS.MsanPoisonStackFn,
1223                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
1224                      ConstantInt::get(MS.IntptrTy, Size));
1225    } else {
1226      Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
1227      IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern),
1228                       Size, I.getAlignment());
1229    }
1230
1231    if (ClTrackOrigins) {
1232      setOrigin(&I, getCleanOrigin());
1233      SmallString<2048> StackDescriptionStorage;
1234      raw_svector_ostream StackDescription(StackDescriptionStorage);
1235      // We create a string with a description of the stack allocation and
1236      // pass it into __msan_set_alloca_origin.
1237      // It will be printed by the run-time if stack-originated UMR is found.
1238      // The first 4 bytes of the string are set to '----' and will be replaced
1239      // by __msan_va_arg_overflow_size_tls at the first call.
1240      StackDescription << "----" << I.getName() << "@" << F.getName();
1241      Value *Descr =
1242          createPrivateNonConstGlobalForString(*F.getParent(),
1243                                               StackDescription.str());
1244      IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
1245                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
1246                      ConstantInt::get(MS.IntptrTy, Size),
1247                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
1248    }
1249  }
1250
1251  void visitSelectInst(SelectInst& I) {
1252    IRBuilder<> IRB(&I);
1253    setShadow(&I,  IRB.CreateSelect(I.getCondition(),
1254              getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
1255              "_msprop"));
1256    if (ClTrackOrigins)
1257      setOrigin(&I, IRB.CreateSelect(I.getCondition(),
1258                getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue())));
1259  }
1260
1261  void visitLandingPadInst(LandingPadInst &I) {
1262    // Do nothing.
1263    // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1
1264    setShadow(&I, getCleanShadow(&I));
1265    setOrigin(&I, getCleanOrigin());
1266  }
1267
1268  void visitGetElementPtrInst(GetElementPtrInst &I) {
1269    handleShadowOr(I);
1270  }
1271
1272  void visitExtractValueInst(ExtractValueInst &I) {
1273    IRBuilder<> IRB(&I);
1274    Value *Agg = I.getAggregateOperand();
1275    DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
1276    Value *AggShadow = getShadow(Agg);
1277    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
1278    Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
1279    DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
1280    setShadow(&I, ResShadow);
1281    setOrigin(&I, getCleanOrigin());
1282  }
1283
1284  void visitInsertValueInst(InsertValueInst &I) {
1285    IRBuilder<> IRB(&I);
1286    DEBUG(dbgs() << "InsertValue:  " << I << "\n");
1287    Value *AggShadow = getShadow(I.getAggregateOperand());
1288    Value *InsShadow = getShadow(I.getInsertedValueOperand());
1289    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
1290    DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
1291    Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
1292    DEBUG(dbgs() << "   Res:        " << *Res << "\n");
1293    setShadow(&I, Res);
1294    setOrigin(&I, getCleanOrigin());
1295  }
1296
1297  void dumpInst(Instruction &I) {
1298    if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1299      errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
1300    } else {
1301      errs() << "ZZZ " << I.getOpcodeName() << "\n";
1302    }
1303    errs() << "QQQ " << I << "\n";
1304  }
1305
1306  void visitResumeInst(ResumeInst &I) {
1307    DEBUG(dbgs() << "Resume: " << I << "\n");
1308    // Nothing to do here.
1309  }
1310
1311  void visitInstruction(Instruction &I) {
1312    // Everything else: stop propagating and check for poisoned shadow.
1313    if (ClDumpStrictInstructions)
1314      dumpInst(I);
1315    DEBUG(dbgs() << "DEFAULT: " << I << "\n");
1316    for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
1317      insertCheck(I.getOperand(i), &I);
1318    setShadow(&I, getCleanShadow(&I));
1319    setOrigin(&I, getCleanOrigin());
1320  }
1321};
1322
1323/// \brief AMD64-specific implementation of VarArgHelper.
1324struct VarArgAMD64Helper : public VarArgHelper {
1325  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
1326  // See a comment in visitCallSite for more details.
1327  static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
1328  static const unsigned AMD64FpEndOffset = 176;
1329
1330  Function &F;
1331  MemorySanitizer &MS;
1332  MemorySanitizerVisitor &MSV;
1333  Value *VAArgTLSCopy;
1334  Value *VAArgOverflowSize;
1335
1336  SmallVector<CallInst*, 16> VAStartInstrumentationList;
1337
1338  VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
1339                    MemorySanitizerVisitor &MSV)
1340    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
1341
1342  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
1343
1344  ArgKind classifyArgument(Value* arg) {
1345    // A very rough approximation of X86_64 argument classification rules.
1346    Type *T = arg->getType();
1347    if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
1348      return AK_FloatingPoint;
1349    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
1350      return AK_GeneralPurpose;
1351    if (T->isPointerTy())
1352      return AK_GeneralPurpose;
1353    return AK_Memory;
1354  }
1355
1356  // For VarArg functions, store the argument shadow in an ABI-specific format
1357  // that corresponds to va_list layout.
1358  // We do this because Clang lowers va_arg in the frontend, and this pass
1359  // only sees the low level code that deals with va_list internals.
1360  // A much easier alternative (provided that Clang emits va_arg instructions)
1361  // would have been to associate each live instance of va_list with a copy of
1362  // MSanParamTLS, and extract shadow on va_arg() call in the argument list
1363  // order.
1364  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {
1365    unsigned GpOffset = 0;
1366    unsigned FpOffset = AMD64GpEndOffset;
1367    unsigned OverflowOffset = AMD64FpEndOffset;
1368    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
1369         ArgIt != End; ++ArgIt) {
1370      Value *A = *ArgIt;
1371      ArgKind AK = classifyArgument(A);
1372      if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
1373        AK = AK_Memory;
1374      if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
1375        AK = AK_Memory;
1376      Value *Base;
1377      switch (AK) {
1378      case AK_GeneralPurpose:
1379        Base = getShadowPtrForVAArgument(A, IRB, GpOffset);
1380        GpOffset += 8;
1381        break;
1382      case AK_FloatingPoint:
1383        Base = getShadowPtrForVAArgument(A, IRB, FpOffset);
1384        FpOffset += 16;
1385        break;
1386      case AK_Memory:
1387        uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType());
1388        Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
1389        OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
1390      }
1391      IRB.CreateStore(MSV.getShadow(A), Base);
1392    }
1393    Constant *OverflowSize =
1394      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
1395    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
1396  }
1397
1398  /// \brief Compute the shadow address for a given va_arg.
1399  Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB,
1400                                   int ArgOffset) {
1401    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
1402    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1403    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0),
1404                              "_msarg");
1405  }
1406
1407  void visitVAStartInst(VAStartInst &I) {
1408    IRBuilder<> IRB(&I);
1409    VAStartInstrumentationList.push_back(&I);
1410    Value *VAListTag = I.getArgOperand(0);
1411    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
1412
1413    // Unpoison the whole __va_list_tag.
1414    // FIXME: magic ABI constants.
1415    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
1416                     /* size */24, /* alignment */16, false);
1417  }
1418
1419  void visitVACopyInst(VACopyInst &I) {
1420    IRBuilder<> IRB(&I);
1421    Value *VAListTag = I.getArgOperand(0);
1422    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
1423
1424    // Unpoison the whole __va_list_tag.
1425    // FIXME: magic ABI constants.
1426    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
1427                     /* size */ 24, /* alignment */ 16, false);
1428  }
1429
1430  void finalizeInstrumentation() {
1431    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
1432           "finalizeInstrumentation called twice");
1433    if (!VAStartInstrumentationList.empty()) {
1434      // If there is a va_start in this function, make a backup copy of
1435      // va_arg_tls somewhere in the function entry block.
1436      IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1437      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
1438      Value *CopySize =
1439        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
1440                      VAArgOverflowSize);
1441      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
1442      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
1443    }
1444
1445    // Instrument va_start.
1446    // Copy va_list shadow from the backup copy of the TLS contents.
1447    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
1448      CallInst *OrigInst = VAStartInstrumentationList[i];
1449      IRBuilder<> IRB(OrigInst->getNextNode());
1450      Value *VAListTag = OrigInst->getArgOperand(0);
1451
1452      Value *RegSaveAreaPtrPtr =
1453        IRB.CreateIntToPtr(
1454          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
1455                        ConstantInt::get(MS.IntptrTy, 16)),
1456          Type::getInt64PtrTy(*MS.C));
1457      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
1458      Value *RegSaveAreaShadowPtr =
1459        MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
1460      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy,
1461                       AMD64FpEndOffset, 16);
1462
1463      Value *OverflowArgAreaPtrPtr =
1464        IRB.CreateIntToPtr(
1465          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
1466                        ConstantInt::get(MS.IntptrTy, 8)),
1467          Type::getInt64PtrTy(*MS.C));
1468      Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
1469      Value *OverflowArgAreaShadowPtr =
1470        MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
1471      Value *SrcPtr =
1472        getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
1473      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
1474    }
1475  }
1476};
1477
1478VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1479                                 MemorySanitizerVisitor &Visitor) {
1480  return new VarArgAMD64Helper(Func, Msan, Visitor);
1481}
1482
1483}  // namespace
1484
1485bool MemorySanitizer::runOnFunction(Function &F) {
1486  MemorySanitizerVisitor Visitor(F, *this);
1487
1488  // Clear out readonly/readnone attributes.
1489  AttrBuilder B;
1490  B.addAttribute(Attributes::ReadOnly)
1491    .addAttribute(Attributes::ReadNone);
1492  F.removeAttribute(AttrListPtr::FunctionIndex,
1493                    Attributes::get(F.getContext(), B));
1494
1495  return Visitor.runOnFunction();
1496}
1497