MemorySanitizer.cpp revision af4451b37e381c643144dc00614e63eef8db6082
1//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file is a part of MemorySanitizer, a detector of uninitialized
11/// reads.
12///
13/// Status: early prototype.
14///
15/// The algorithm of the tool is similar to Memcheck
16/// (http://goo.gl/QKbem). We associate a few shadow bits with every
17/// byte of the application memory, poison the shadow of the malloc-ed
18/// or alloca-ed memory, load the shadow bits on every memory read,
19/// propagate the shadow bits through some of the arithmetic
20/// instruction (including MOV), store the shadow bits on every memory
21/// write, report a bug on some other instructions (e.g. JMP) if the
22/// associated shadow is poisoned.
23///
24/// But there are differences too. The first and the major one:
25/// compiler instrumentation instead of binary instrumentation. This
26/// gives us much better register allocation, possible compiler
27/// optimizations and a fast start-up. But this brings the major issue
28/// as well: msan needs to see all program events, including system
29/// calls and reads/writes in system libraries, so we either need to
30/// compile *everything* with msan or use a binary translation
31/// component (e.g. DynamoRIO) to instrument pre-built libraries.
32/// Another difference from Memcheck is that we use 8 shadow bits per
33/// byte of application memory and use a direct shadow mapping. This
34/// greatly simplifies the instrumentation code and avoids races on
35/// shadow updates (Memcheck is single-threaded so races are not a
36/// concern there. Memcheck uses 2 shadow bits per byte with a slow
37/// path storage that uses 8 bits per byte).
38///
39/// The default value of shadow is 0, which means "clean" (not poisoned).
40///
41/// Every module initializer should call __msan_init to ensure that the
42/// shadow memory is ready. On error, __msan_warning is called. Since
43/// parameters and return values may be passed via registers, we have a
44/// specialized thread-local shadow for return values
45/// (__msan_retval_tls) and parameters (__msan_param_tls).
46//===----------------------------------------------------------------------===//
47
48#define DEBUG_TYPE "msan"
49
50#include "BlackList.h"
51#include "llvm/DataLayout.h"
52#include "llvm/Function.h"
53#include "llvm/InlineAsm.h"
54#include "llvm/IntrinsicInst.h"
55#include "llvm/IRBuilder.h"
56#include "llvm/LLVMContext.h"
57#include "llvm/MDBuilder.h"
58#include "llvm/Module.h"
59#include "llvm/Type.h"
60#include "llvm/ADT/DepthFirstIterator.h"
61#include "llvm/ADT/SmallString.h"
62#include "llvm/ADT/SmallVector.h"
63#include "llvm/ADT/ValueMap.h"
64#include "llvm/Transforms/Instrumentation.h"
65#include "llvm/Transforms/Utils/BasicBlockUtils.h"
66#include "llvm/Transforms/Utils/ModuleUtils.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/InstVisitor.h"
71#include "llvm/Support/raw_ostream.h"
72#include "llvm/Transforms/Instrumentation.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/ModuleUtils.h"
75
76using namespace llvm;
77
78static const uint64_t kShadowMask32 = 1ULL << 31;
79static const uint64_t kShadowMask64 = 1ULL << 46;
80static const uint64_t kOriginOffset32 = 1ULL << 30;
81static const uint64_t kOriginOffset64 = 1ULL << 45;
82
83// This is an important flag that makes the reports much more
84// informative at the cost of greater slowdown. Not fully implemented
85// yet.
86// FIXME: this should be a top-level clang flag, e.g.
87// -fmemory-sanitizer-full.
88static cl::opt<bool> ClTrackOrigins("msan-track-origins",
89       cl::desc("Track origins (allocation sites) of poisoned memory"),
90       cl::Hidden, cl::init(false));
91static cl::opt<bool> ClKeepGoing("msan-keep-going",
92       cl::desc("keep going after reporting a UMR"),
93       cl::Hidden, cl::init(false));
94static cl::opt<bool> ClPoisonStack("msan-poison-stack",
95       cl::desc("poison uninitialized stack variables"),
96       cl::Hidden, cl::init(true));
97static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
98       cl::desc("poison uninitialized stack variables with a call"),
99       cl::Hidden, cl::init(false));
100static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
101       cl::desc("poison uninitialized stack variables with the given patter"),
102       cl::Hidden, cl::init(0xff));
103
104static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
105       cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
106       cl::Hidden, cl::init(true));
107
108// This flag controls whether we check the shadow of the address
109// operand of load or store. Such bugs are very rare, since load from
110// a garbage address typically results in SEGV, but still happen
111// (e.g. only lower bits of address are garbage, or the access happens
112// early at program startup where malloc-ed memory is more likely to
113// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
114static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
115       cl::desc("report accesses through a pointer which has poisoned shadow"),
116       cl::Hidden, cl::init(true));
117
118static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
119       cl::desc("print out instructions with default strict semantics"),
120       cl::Hidden, cl::init(false));
121
122static cl::opt<std::string>  ClBlackListFile("msan-blacklist",
123       cl::desc("File containing the list of functions where MemorySanitizer "
124                "should not report bugs"), cl::Hidden);
125
126namespace {
127
128/// \brief An instrumentation pass implementing detection of uninitialized
129/// reads.
130///
131/// MemorySanitizer: instrument the code in module to find
132/// uninitialized reads.
133class MemorySanitizer : public FunctionPass {
134public:
135  MemorySanitizer() : FunctionPass(ID), TD(0) { }
136  const char *getPassName() const { return "MemorySanitizer"; }
137  bool runOnFunction(Function &F);
138  bool doInitialization(Module &M);
139  static char ID;  // Pass identification, replacement for typeid.
140
141private:
142  DataLayout *TD;
143  LLVMContext *C;
144  Type *IntptrTy;
145  Type *OriginTy;
146  /// \brief Thread-local shadow storage for function parameters.
147  GlobalVariable *ParamTLS;
148  /// \brief Thread-local origin storage for function parameters.
149  GlobalVariable *ParamOriginTLS;
150  /// \brief Thread-local shadow storage for function return value.
151  GlobalVariable *RetvalTLS;
152  /// \brief Thread-local origin storage for function return value.
153  GlobalVariable *RetvalOriginTLS;
154  /// \brief Thread-local shadow storage for in-register va_arg function
155  /// parameters (x86_64-specific).
156  GlobalVariable *VAArgTLS;
157  /// \brief Thread-local shadow storage for va_arg overflow area
158  /// (x86_64-specific).
159  GlobalVariable *VAArgOverflowSizeTLS;
160  /// \brief Thread-local space used to pass origin value to the UMR reporting
161  /// function.
162  GlobalVariable *OriginTLS;
163
164  /// \brief The run-time callback to print a warning.
165  Value *WarningFn;
166  /// \brief Run-time helper that copies origin info for a memory range.
167  Value *MsanCopyOriginFn;
168  /// \brief Run-time helper that generates a new origin value for a stack
169  /// allocation.
170  Value *MsanSetAllocaOriginFn;
171  /// \brief Run-time helper that poisons stack on function entry.
172  Value *MsanPoisonStackFn;
173  /// \brief MSan runtime replacements for memmove, memcpy and memset.
174  Value *MemmoveFn, *MemcpyFn, *MemsetFn;
175
176  /// \brief Address mask used in application-to-shadow address calculation.
177  /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask.
178  uint64_t ShadowMask;
179  /// \brief Offset of the origin shadow from the "normal" shadow.
180  /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL
181  uint64_t OriginOffset;
182  /// \brief Branch weights for error reporting.
183  MDNode *ColdCallWeights;
184  /// \brief The blacklist.
185  OwningPtr<BlackList> BL;
186  /// \brief An empty volatile inline asm that prevents callback merge.
187  InlineAsm *EmptyAsm;
188
189  friend struct MemorySanitizerVisitor;
190  friend struct VarArgAMD64Helper;
191};
192}  // namespace
193
194char MemorySanitizer::ID = 0;
195INITIALIZE_PASS(MemorySanitizer, "msan",
196                "MemorySanitizer: detects uninitialized reads.",
197                false, false)
198
199FunctionPass *llvm::createMemorySanitizerPass() {
200  return new MemorySanitizer();
201}
202
203/// \brief Create a non-const global initialized with the given string.
204///
205/// Creates a writable global for Str so that we can pass it to the
206/// run-time lib. Runtime uses first 4 bytes of the string to store the
207/// frame ID, so the string needs to be mutable.
208static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
209                                                            StringRef Str) {
210  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
211  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
212                            GlobalValue::PrivateLinkage, StrConst, "");
213}
214
215/// \brief Module-level initialization.
216///
217/// Obtains pointers to the required runtime library functions, and
218/// inserts a call to __msan_init to the module's constructor list.
219bool MemorySanitizer::doInitialization(Module &M) {
220  TD = getAnalysisIfAvailable<DataLayout>();
221  if (!TD)
222    return false;
223  BL.reset(new BlackList(ClBlackListFile));
224  C = &(M.getContext());
225  unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
226  switch (PtrSize) {
227    case 64:
228      ShadowMask = kShadowMask64;
229      OriginOffset = kOriginOffset64;
230      break;
231    case 32:
232      ShadowMask = kShadowMask32;
233      OriginOffset = kOriginOffset32;
234      break;
235    default:
236      report_fatal_error("unsupported pointer size");
237      break;
238  }
239
240  IRBuilder<> IRB(*C);
241  IntptrTy = IRB.getIntPtrTy(TD);
242  OriginTy = IRB.getInt32Ty();
243
244  ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
245
246  // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
247  appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
248                      "__msan_init", IRB.getVoidTy(), NULL)), 0);
249
250  new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::LinkOnceODRLinkage,
251                     IRB.getInt32(ClTrackOrigins), "__msan_track_origins");
252
253  // Create the callback.
254  // FIXME: this function should have "Cold" calling conv,
255  // which is not yet implemented.
256  StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
257                                        : "__msan_warning_noreturn";
258  WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
259
260  MsanCopyOriginFn = M.getOrInsertFunction(
261    "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
262    IRB.getInt8PtrTy(), IntptrTy, NULL);
263  MsanSetAllocaOriginFn = M.getOrInsertFunction(
264    "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
265    IRB.getInt8PtrTy(), NULL);
266  MsanPoisonStackFn = M.getOrInsertFunction(
267    "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
268  MemmoveFn = M.getOrInsertFunction(
269    "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
270    IntptrTy, NULL);
271  MemcpyFn = M.getOrInsertFunction(
272    "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
273    IntptrTy, NULL);
274  MemsetFn = M.getOrInsertFunction(
275    "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
276    IntptrTy, NULL);
277
278  // Create globals.
279  RetvalTLS = new GlobalVariable(
280    M, ArrayType::get(IRB.getInt64Ty(), 8), false,
281    GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
282    GlobalVariable::GeneralDynamicTLSModel);
283  RetvalOriginTLS = new GlobalVariable(
284    M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
285    "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
286
287  ParamTLS = new GlobalVariable(
288    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
289    GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
290    GlobalVariable::GeneralDynamicTLSModel);
291  ParamOriginTLS = new GlobalVariable(
292    M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
293    0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
294
295  VAArgTLS = new GlobalVariable(
296    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
297    GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
298    GlobalVariable::GeneralDynamicTLSModel);
299  VAArgOverflowSizeTLS = new GlobalVariable(
300    M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
301    "__msan_va_arg_overflow_size_tls", 0,
302    GlobalVariable::GeneralDynamicTLSModel);
303  OriginTLS = new GlobalVariable(
304    M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
305    "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
306
307  // We insert an empty inline asm after __msan_report* to avoid callback merge.
308  EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
309                            StringRef(""), StringRef(""),
310                            /*hasSideEffects=*/true);
311  return true;
312}
313
314namespace {
315
316/// \brief A helper class that handles instrumentation of VarArg
317/// functions on a particular platform.
318///
319/// Implementations are expected to insert the instrumentation
320/// necessary to propagate argument shadow through VarArg function
321/// calls. Visit* methods are called during an InstVisitor pass over
322/// the function, and should avoid creating new basic blocks. A new
323/// instance of this class is created for each instrumented function.
324struct VarArgHelper {
325  /// \brief Visit a CallSite.
326  virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
327
328  /// \brief Visit a va_start call.
329  virtual void visitVAStartInst(VAStartInst &I) = 0;
330
331  /// \brief Visit a va_copy call.
332  virtual void visitVACopyInst(VACopyInst &I) = 0;
333
334  /// \brief Finalize function instrumentation.
335  ///
336  /// This method is called after visiting all interesting (see above)
337  /// instructions in a function.
338  virtual void finalizeInstrumentation() = 0;
339
340  virtual ~VarArgHelper() {}
341};
342
343struct MemorySanitizerVisitor;
344
345VarArgHelper*
346CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
347                   MemorySanitizerVisitor &Visitor);
348
349/// This class does all the work for a given function. Store and Load
350/// instructions store and load corresponding shadow and origin
351/// values. Most instructions propagate shadow from arguments to their
352/// return values. Certain instructions (most importantly, BranchInst)
353/// test their argument shadow and print reports (with a runtime call) if it's
354/// non-zero.
355struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
356  Function &F;
357  MemorySanitizer &MS;
358  SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
359  ValueMap<Value*, Value*> ShadowMap, OriginMap;
360  bool InsertChecks;
361  OwningPtr<VarArgHelper> VAHelper;
362
363  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
364  // See a comment in visitCallSite for more details.
365  static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
366  static const unsigned AMD64FpEndOffset = 176;
367
368  struct ShadowOriginAndInsertPoint {
369    Instruction *Shadow;
370    Instruction *Origin;
371    Instruction *OrigIns;
372    ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
373      : Shadow(S), Origin(O), OrigIns(I) { }
374    ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
375  };
376  SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
377
378  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
379    : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
380    InsertChecks = !MS.BL->isIn(F);
381    DEBUG(if (!InsertChecks)
382            dbgs() << "MemorySanitizer is not inserting checks into '"
383                   << F.getName() << "'\n");
384  }
385
386  void materializeChecks() {
387    for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
388      Instruction *Shadow = InstrumentationList[i].Shadow;
389      Instruction *OrigIns = InstrumentationList[i].OrigIns;
390      IRBuilder<> IRB(OrigIns);
391      DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
392      Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
393      DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
394      Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
395                                    getCleanShadow(ConvertedShadow), "_mscmp");
396      Instruction *CheckTerm =
397        SplitBlockAndInsertIfThen(cast<Instruction>(Cmp),
398                                  /* Unreachable */ !ClKeepGoing,
399                                  MS.ColdCallWeights);
400
401      IRB.SetInsertPoint(CheckTerm);
402      if (ClTrackOrigins) {
403        Instruction *Origin = InstrumentationList[i].Origin;
404        IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
405                        MS.OriginTLS);
406      }
407      CallInst *Call = IRB.CreateCall(MS.WarningFn);
408      Call->setDebugLoc(OrigIns->getDebugLoc());
409      IRB.CreateCall(MS.EmptyAsm);
410      DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
411    }
412    DEBUG(dbgs() << "DONE:\n" << F);
413  }
414
415  /// \brief Add MemorySanitizer instrumentation to a function.
416  bool runOnFunction() {
417    if (!MS.TD) return false;
418    // Iterate all BBs in depth-first order and create shadow instructions
419    // for all instructions (where applicable).
420    // For PHI nodes we create dummy shadow PHIs which will be finalized later.
421    for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
422         DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
423      BasicBlock *BB = *DI;
424      visit(*BB);
425    }
426
427    // Finalize PHI nodes.
428    for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
429      PHINode *PN = ShadowPHINodes[i];
430      PHINode *PNS = cast<PHINode>(getShadow(PN));
431      PHINode *PNO = ClTrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
432      size_t NumValues = PN->getNumIncomingValues();
433      for (size_t v = 0; v < NumValues; v++) {
434        PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
435        if (PNO)
436          PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
437      }
438    }
439
440    VAHelper->finalizeInstrumentation();
441
442    materializeChecks();
443
444    return true;
445  }
446
447  /// \brief Compute the shadow type that corresponds to a given Value.
448  Type *getShadowTy(Value *V) {
449    return getShadowTy(V->getType());
450  }
451
452  /// \brief Compute the shadow type that corresponds to a given Type.
453  Type *getShadowTy(Type *OrigTy) {
454    if (!OrigTy->isSized()) {
455      return 0;
456    }
457    // For integer type, shadow is the same as the original type.
458    // This may return weird-sized types like i1.
459    if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
460      return IT;
461    if (VectorType *VT = dyn_cast<VectorType>(OrigTy))
462      return VectorType::getInteger(VT);
463    if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
464      SmallVector<Type*, 4> Elements;
465      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
466        Elements.push_back(getShadowTy(ST->getElementType(i)));
467      StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
468      DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
469      return Res;
470    }
471    uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy);
472    return IntegerType::get(*MS.C, TypeSize);
473  }
474
475  /// \brief Flatten a vector type.
476  Type *getShadowTyNoVec(Type *ty) {
477    if (VectorType *vt = dyn_cast<VectorType>(ty))
478      return IntegerType::get(*MS.C, vt->getBitWidth());
479    return ty;
480  }
481
482  /// \brief Convert a shadow value to it's flattened variant.
483  Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
484    Type *Ty = V->getType();
485    Type *NoVecTy = getShadowTyNoVec(Ty);
486    if (Ty == NoVecTy) return V;
487    return IRB.CreateBitCast(V, NoVecTy);
488  }
489
490  /// \brief Compute the shadow address that corresponds to a given application
491  /// address.
492  ///
493  /// Shadow = Addr & ~ShadowMask.
494  Value *getShadowPtr(Value *Addr, Type *ShadowTy,
495                      IRBuilder<> &IRB) {
496    Value *ShadowLong =
497      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
498                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
499    return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
500  }
501
502  /// \brief Compute the origin address that corresponds to a given application
503  /// address.
504  ///
505  /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL
506  Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) {
507    Value *ShadowLong =
508      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
509                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
510    Value *Add =
511      IRB.CreateAdd(ShadowLong,
512                    ConstantInt::get(MS.IntptrTy, MS.OriginOffset));
513    Value *SecondAnd =
514      IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL));
515    return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0));
516  }
517
518  /// \brief Compute the shadow address for a given function argument.
519  ///
520  /// Shadow = ParamTLS+ArgOffset.
521  Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
522                                 int ArgOffset) {
523    Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
524    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
525    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
526                              "_msarg");
527  }
528
529  /// \brief Compute the origin address for a given function argument.
530  Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
531                                 int ArgOffset) {
532    if (!ClTrackOrigins) return 0;
533    Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
534    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
535    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
536                              "_msarg_o");
537  }
538
539  /// \brief Compute the shadow address for a retval.
540  Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
541    Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy);
542    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
543                              "_msret");
544  }
545
546  /// \brief Compute the origin address for a retval.
547  Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
548    // We keep a single origin for the entire retval. Might be too optimistic.
549    return MS.RetvalOriginTLS;
550  }
551
552  /// \brief Set SV to be the shadow value for V.
553  void setShadow(Value *V, Value *SV) {
554    assert(!ShadowMap.count(V) && "Values may only have one shadow");
555    ShadowMap[V] = SV;
556  }
557
558  /// \brief Set Origin to be the origin value for V.
559  void setOrigin(Value *V, Value *Origin) {
560    if (!ClTrackOrigins) return;
561    assert(!OriginMap.count(V) && "Values may only have one origin");
562    DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
563    OriginMap[V] = Origin;
564  }
565
566  /// \brief Create a clean shadow value for a given value.
567  ///
568  /// Clean shadow (all zeroes) means all bits of the value are defined
569  /// (initialized).
570  Value *getCleanShadow(Value *V) {
571    Type *ShadowTy = getShadowTy(V);
572    if (!ShadowTy)
573      return 0;
574    return Constant::getNullValue(ShadowTy);
575  }
576
577  /// \brief Create a dirty shadow of a given shadow type.
578  Constant *getPoisonedShadow(Type *ShadowTy) {
579    assert(ShadowTy);
580    if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
581      return Constant::getAllOnesValue(ShadowTy);
582    StructType *ST = cast<StructType>(ShadowTy);
583    SmallVector<Constant *, 4> Vals;
584    for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
585      Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
586    return ConstantStruct::get(ST, Vals);
587  }
588
589  /// \brief Create a clean (zero) origin.
590  Value *getCleanOrigin() {
591    return Constant::getNullValue(MS.OriginTy);
592  }
593
594  /// \brief Get the shadow value for a given Value.
595  ///
596  /// This function either returns the value set earlier with setShadow,
597  /// or extracts if from ParamTLS (for function arguments).
598  Value *getShadow(Value *V) {
599    if (Instruction *I = dyn_cast<Instruction>(V)) {
600      // For instructions the shadow is already stored in the map.
601      Value *Shadow = ShadowMap[V];
602      if (!Shadow) {
603        DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
604        assert(Shadow && "No shadow for a value");
605      }
606      return Shadow;
607    }
608    if (UndefValue *U = dyn_cast<UndefValue>(V)) {
609      Value *AllOnes = getPoisonedShadow(getShadowTy(V));
610      DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
611      return AllOnes;
612    }
613    if (Argument *A = dyn_cast<Argument>(V)) {
614      // For arguments we compute the shadow on demand and store it in the map.
615      Value **ShadowPtr = &ShadowMap[V];
616      if (*ShadowPtr)
617        return *ShadowPtr;
618      Function *F = A->getParent();
619      IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
620      unsigned ArgOffset = 0;
621      for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
622           AI != AE; ++AI) {
623        if (!AI->getType()->isSized()) {
624          DEBUG(dbgs() << "Arg is not sized\n");
625          continue;
626        }
627        unsigned Size = AI->hasByValAttr()
628          ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType())
629          : MS.TD->getTypeAllocSize(AI->getType());
630        if (A == AI) {
631          Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
632          if (AI->hasByValAttr()) {
633            // ByVal pointer itself has clean shadow. We copy the actual
634            // argument shadow to the underlying memory.
635            Value *Cpy = EntryIRB.CreateMemCpy(
636              getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
637              Base, Size, AI->getParamAlignment());
638            DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
639            *ShadowPtr = getCleanShadow(V);
640          } else {
641            *ShadowPtr = EntryIRB.CreateLoad(Base);
642          }
643          DEBUG(dbgs() << "  ARG:    "  << *AI << " ==> " <<
644                **ShadowPtr << "\n");
645          if (ClTrackOrigins) {
646            Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset);
647            setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
648          }
649        }
650        ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
651      }
652      assert(*ShadowPtr && "Could not find shadow for an argument");
653      return *ShadowPtr;
654    }
655    // For everything else the shadow is zero.
656    return getCleanShadow(V);
657  }
658
659  /// \brief Get the shadow for i-th argument of the instruction I.
660  Value *getShadow(Instruction *I, int i) {
661    return getShadow(I->getOperand(i));
662  }
663
664  /// \brief Get the origin for a value.
665  Value *getOrigin(Value *V) {
666    if (!ClTrackOrigins) return 0;
667    if (isa<Instruction>(V) || isa<Argument>(V)) {
668      Value *Origin = OriginMap[V];
669      if (!Origin) {
670        DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n");
671        Origin = getCleanOrigin();
672      }
673      return Origin;
674    }
675    return getCleanOrigin();
676  }
677
678  /// \brief Get the origin for i-th argument of the instruction I.
679  Value *getOrigin(Instruction *I, int i) {
680    return getOrigin(I->getOperand(i));
681  }
682
683  /// \brief Remember the place where a shadow check should be inserted.
684  ///
685  /// This location will be later instrumented with a check that will print a
686  /// UMR warning in runtime if the value is not fully defined.
687  void insertCheck(Value *Val, Instruction *OrigIns) {
688    assert(Val);
689    if (!InsertChecks) return;
690    Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
691    if (!Shadow) return;
692    Type *ShadowTy = Shadow->getType();
693    assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
694           "Can only insert checks for integer and vector shadow types");
695    Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
696    InstrumentationList.push_back(
697      ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
698  }
699
700  //------------------- Visitors.
701
702  /// \brief Instrument LoadInst
703  ///
704  /// Loads the corresponding shadow and (optionally) origin.
705  /// Optionally, checks that the load address is fully defined.
706  void visitLoadInst(LoadInst &I) {
707    Type *LoadTy = I.getType();
708    assert(LoadTy->isSized() && "Load type must have size");
709    IRBuilder<> IRB(&I);
710    Type *ShadowTy = getShadowTy(&I);
711    Value *Addr = I.getPointerOperand();
712    Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
713    setShadow(&I, IRB.CreateLoad(ShadowPtr, "_msld"));
714
715    if (ClCheckAccessAddress)
716      insertCheck(I.getPointerOperand(), &I);
717
718    if (ClTrackOrigins)
719      setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
720  }
721
722  /// \brief Instrument StoreInst
723  ///
724  /// Stores the corresponding shadow and (optionally) origin.
725  /// Optionally, checks that the store address is fully defined.
726  /// Volatile stores check that the value being stored is fully defined.
727  void visitStoreInst(StoreInst &I) {
728    IRBuilder<> IRB(&I);
729    Value *Val = I.getValueOperand();
730    Value *Addr = I.getPointerOperand();
731    Value *Shadow = getShadow(Val);
732    Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
733
734    StoreInst *NewSI = IRB.CreateStore(Shadow, ShadowPtr);
735    DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
736    // If the store is volatile, add a check.
737    if (I.isVolatile())
738      insertCheck(Val, &I);
739    if (ClCheckAccessAddress)
740      insertCheck(Addr, &I);
741
742    if (ClTrackOrigins)
743      IRB.CreateStore(getOrigin(Val), getOriginPtr(Addr, IRB));
744  }
745
746  // Casts.
747  void visitSExtInst(SExtInst &I) {
748    IRBuilder<> IRB(&I);
749    setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
750    setOrigin(&I, getOrigin(&I, 0));
751  }
752
753  void visitZExtInst(ZExtInst &I) {
754    IRBuilder<> IRB(&I);
755    setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
756    setOrigin(&I, getOrigin(&I, 0));
757  }
758
759  void visitTruncInst(TruncInst &I) {
760    IRBuilder<> IRB(&I);
761    setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
762    setOrigin(&I, getOrigin(&I, 0));
763  }
764
765  void visitBitCastInst(BitCastInst &I) {
766    IRBuilder<> IRB(&I);
767    setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
768    setOrigin(&I, getOrigin(&I, 0));
769  }
770
771  void visitPtrToIntInst(PtrToIntInst &I) {
772    IRBuilder<> IRB(&I);
773    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
774             "_msprop_ptrtoint"));
775    setOrigin(&I, getOrigin(&I, 0));
776  }
777
778  void visitIntToPtrInst(IntToPtrInst &I) {
779    IRBuilder<> IRB(&I);
780    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
781             "_msprop_inttoptr"));
782    setOrigin(&I, getOrigin(&I, 0));
783  }
784
785  void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
786  void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
787  void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
788  void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
789  void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
790  void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
791
792  /// \brief Propagate shadow for bitwise AND.
793  ///
794  /// This code is exact, i.e. if, for example, a bit in the left argument
795  /// is defined and 0, then neither the value not definedness of the
796  /// corresponding bit in B don't affect the resulting shadow.
797  void visitAnd(BinaryOperator &I) {
798    IRBuilder<> IRB(&I);
799    //  "And" of 0 and a poisoned value results in unpoisoned value.
800    //  1&1 => 1;     0&1 => 0;     p&1 => p;
801    //  1&0 => 0;     0&0 => 0;     p&0 => 0;
802    //  1&p => p;     0&p => 0;     p&p => p;
803    //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
804    Value *S1 = getShadow(&I, 0);
805    Value *S2 = getShadow(&I, 1);
806    Value *V1 = I.getOperand(0);
807    Value *V2 = I.getOperand(1);
808    if (V1->getType() != S1->getType()) {
809      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
810      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
811    }
812    Value *S1S2 = IRB.CreateAnd(S1, S2);
813    Value *V1S2 = IRB.CreateAnd(V1, S2);
814    Value *S1V2 = IRB.CreateAnd(S1, V2);
815    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
816    setOriginForNaryOp(I);
817  }
818
819  void visitOr(BinaryOperator &I) {
820    IRBuilder<> IRB(&I);
821    //  "Or" of 1 and a poisoned value results in unpoisoned value.
822    //  1|1 => 1;     0|1 => 1;     p|1 => 1;
823    //  1|0 => 1;     0|0 => 0;     p|0 => p;
824    //  1|p => 1;     0|p => p;     p|p => p;
825    //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
826    Value *S1 = getShadow(&I, 0);
827    Value *S2 = getShadow(&I, 1);
828    Value *V1 = IRB.CreateNot(I.getOperand(0));
829    Value *V2 = IRB.CreateNot(I.getOperand(1));
830    if (V1->getType() != S1->getType()) {
831      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
832      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
833    }
834    Value *S1S2 = IRB.CreateAnd(S1, S2);
835    Value *V1S2 = IRB.CreateAnd(V1, S2);
836    Value *S1V2 = IRB.CreateAnd(S1, V2);
837    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
838    setOriginForNaryOp(I);
839  }
840
841  /// \brief Propagate origin for an instruction.
842  ///
843  /// This is a general case of origin propagation. For an Nary operation,
844  /// is set to the origin of an argument that is not entirely initialized.
845  /// It does not matter which one is picked if all arguments are initialized.
846  void setOriginForNaryOp(Instruction &I) {
847    if (!ClTrackOrigins) return;
848    IRBuilder<> IRB(&I);
849    Value *Origin = getOrigin(&I, 0);
850    for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) {
851      Value *S = convertToShadowTyNoVec(getShadow(&I, Op - 1), IRB);
852      Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)),
853                                Origin, getOrigin(&I, Op));
854    }
855    setOrigin(&I, Origin);
856  }
857
858  /// \brief Propagate shadow for a binary operation.
859  ///
860  /// Shadow = Shadow0 | Shadow1, all 3 must have the same type.
861  /// Bitwise OR is selected as an operation that will never lose even a bit of
862  /// poison.
863  void handleShadowOrBinary(Instruction &I) {
864    IRBuilder<> IRB(&I);
865    Value *Shadow0 = getShadow(&I, 0);
866    Value *Shadow1 = getShadow(&I, 1);
867    setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop"));
868    setOriginForNaryOp(I);
869  }
870
871  /// \brief Propagate shadow for arbitrary operation.
872  ///
873  /// This is a general case of shadow propagation, used in all cases where we
874  /// don't know and/or care about what the operation actually does.
875  /// It converts all input shadow values to a common type (extending or
876  /// truncating as necessary), and bitwise OR's them.
877  ///
878  /// This is much cheaper than inserting checks (i.e. requiring inputs to be
879  /// fully initialized), and less prone to false positives.
880  // FIXME: is the casting actually correct?
881  // FIXME: merge this with handleShadowOrBinary.
882  void handleShadowOr(Instruction &I) {
883    IRBuilder<> IRB(&I);
884    Value *Shadow = getShadow(&I, 0);
885    for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op)
886      Shadow = IRB.CreateOr(
887        Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false),
888        "_msprop");
889    Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false);
890    setShadow(&I, Shadow);
891    setOriginForNaryOp(I);
892  }
893
894  void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
895  void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); }
896  void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); }
897  void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
898  void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); }
899  void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); }
900  void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); }
901
902  void handleDiv(Instruction &I) {
903    IRBuilder<> IRB(&I);
904    // Strict on the second argument.
905    insertCheck(I.getOperand(1), &I);
906    setShadow(&I, getShadow(&I, 0));
907    setOrigin(&I, getOrigin(&I, 0));
908  }
909
910  void visitUDiv(BinaryOperator &I) { handleDiv(I); }
911  void visitSDiv(BinaryOperator &I) { handleDiv(I); }
912  void visitFDiv(BinaryOperator &I) { handleDiv(I); }
913  void visitURem(BinaryOperator &I) { handleDiv(I); }
914  void visitSRem(BinaryOperator &I) { handleDiv(I); }
915  void visitFRem(BinaryOperator &I) { handleDiv(I); }
916
917  /// \brief Instrument == and != comparisons.
918  ///
919  /// Sometimes the comparison result is known even if some of the bits of the
920  /// arguments are not.
921  void handleEqualityComparison(ICmpInst &I) {
922    IRBuilder<> IRB(&I);
923    Value *A = I.getOperand(0);
924    Value *B = I.getOperand(1);
925    Value *Sa = getShadow(A);
926    Value *Sb = getShadow(B);
927    if (A->getType()->isPointerTy())
928      A = IRB.CreatePointerCast(A, MS.IntptrTy);
929    if (B->getType()->isPointerTy())
930      B = IRB.CreatePointerCast(B, MS.IntptrTy);
931    // A == B  <==>  (C = A^B) == 0
932    // A != B  <==>  (C = A^B) != 0
933    // Sc = Sa | Sb
934    Value *C = IRB.CreateXor(A, B);
935    Value *Sc = IRB.CreateOr(Sa, Sb);
936    // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
937    // Result is defined if one of the following is true
938    // * there is a defined 1 bit in C
939    // * C is fully defined
940    // Si = !(C & ~Sc) && Sc
941    Value *Zero = Constant::getNullValue(Sc->getType());
942    Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
943    Value *Si =
944      IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
945                    IRB.CreateICmpEQ(
946                      IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
947    Si->setName("_msprop_icmp");
948    setShadow(&I, Si);
949    setOriginForNaryOp(I);
950  }
951
952  void visitICmpInst(ICmpInst &I) {
953    if (ClHandleICmp && I.isEquality())
954      handleEqualityComparison(I);
955    else
956      handleShadowOr(I);
957  }
958
959  void visitFCmpInst(FCmpInst &I) {
960    handleShadowOr(I);
961  }
962
963  void handleShift(BinaryOperator &I) {
964    IRBuilder<> IRB(&I);
965    // If any of the S2 bits are poisoned, the whole thing is poisoned.
966    // Otherwise perform the same shift on S1.
967    Value *S1 = getShadow(&I, 0);
968    Value *S2 = getShadow(&I, 1);
969    Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
970                                   S2->getType());
971    Value *V2 = I.getOperand(1);
972    Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
973    setShadow(&I, IRB.CreateOr(Shift, S2Conv));
974    setOriginForNaryOp(I);
975  }
976
977  void visitShl(BinaryOperator &I) { handleShift(I); }
978  void visitAShr(BinaryOperator &I) { handleShift(I); }
979  void visitLShr(BinaryOperator &I) { handleShift(I); }
980
981  /// \brief Instrument llvm.memmove
982  ///
983  /// At this point we don't know if llvm.memmove will be inlined or not.
984  /// If we don't instrument it and it gets inlined,
985  /// our interceptor will not kick in and we will lose the memmove.
986  /// If we instrument the call here, but it does not get inlined,
987  /// we will memove the shadow twice: which is bad in case
988  /// of overlapping regions. So, we simply lower the intrinsic to a call.
989  ///
990  /// Similar situation exists for memcpy and memset.
991  void visitMemMoveInst(MemMoveInst &I) {
992    IRBuilder<> IRB(&I);
993    IRB.CreateCall3(
994      MS.MemmoveFn,
995      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
996      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
997      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
998    I.eraseFromParent();
999  }
1000
1001  // Similar to memmove: avoid copying shadow twice.
1002  // This is somewhat unfortunate as it may slowdown small constant memcpys.
1003  // FIXME: consider doing manual inline for small constant sizes and proper
1004  // alignment.
1005  void visitMemCpyInst(MemCpyInst &I) {
1006    IRBuilder<> IRB(&I);
1007    IRB.CreateCall3(
1008      MS.MemcpyFn,
1009      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
1010      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
1011      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
1012    I.eraseFromParent();
1013  }
1014
1015  // Same as memcpy.
1016  void visitMemSetInst(MemSetInst &I) {
1017    IRBuilder<> IRB(&I);
1018    IRB.CreateCall3(
1019      MS.MemsetFn,
1020      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
1021      IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
1022      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
1023    I.eraseFromParent();
1024  }
1025
1026  void visitVAStartInst(VAStartInst &I) {
1027    VAHelper->visitVAStartInst(I);
1028  }
1029
1030  void visitVACopyInst(VACopyInst &I) {
1031    VAHelper->visitVACopyInst(I);
1032  }
1033
1034  void visitCallSite(CallSite CS) {
1035    Instruction &I = *CS.getInstruction();
1036    assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
1037    if (CS.isCall()) {
1038      // Allow only tail calls with the same types, otherwise
1039      // we may have a false positive: shadow for a non-void RetVal
1040      // will get propagated to a void RetVal.
1041      CallInst *Call = cast<CallInst>(&I);
1042      if (Call->isTailCall() && Call->getType() != Call->getParent()->getType())
1043        Call->setTailCall(false);
1044      if (isa<IntrinsicInst>(&I)) {
1045        // All intrinsics we care about are handled in corresponding visit*
1046        // methods. Add checks for the arguments, mark retval as clean.
1047        visitInstruction(I);
1048        return;
1049      }
1050    }
1051    IRBuilder<> IRB(&I);
1052    unsigned ArgOffset = 0;
1053    DEBUG(dbgs() << "  CallSite: " << I << "\n");
1054    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
1055         ArgIt != End; ++ArgIt) {
1056      Value *A = *ArgIt;
1057      unsigned i = ArgIt - CS.arg_begin();
1058      if (!A->getType()->isSized()) {
1059        DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
1060        continue;
1061      }
1062      unsigned Size = 0;
1063      Value *Store = 0;
1064      // Compute the Shadow for arg even if it is ByVal, because
1065      // in that case getShadow() will copy the actual arg shadow to
1066      // __msan_param_tls.
1067      Value *ArgShadow = getShadow(A);
1068      Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
1069      DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
1070            " Shadow: " << *ArgShadow << "\n");
1071      if (CS.paramHasAttr(i + 1, Attributes::ByVal)) {
1072        assert(A->getType()->isPointerTy() &&
1073               "ByVal argument is not a pointer!");
1074        Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType());
1075        unsigned Alignment = CS.getParamAlignment(i + 1);
1076        Store = IRB.CreateMemCpy(ArgShadowBase,
1077                                 getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
1078                                 Size, Alignment);
1079      } else {
1080        Size = MS.TD->getTypeAllocSize(A->getType());
1081        Store = IRB.CreateStore(ArgShadow, ArgShadowBase);
1082      }
1083      if (ClTrackOrigins)
1084        IRB.CreateStore(getOrigin(A),
1085                        getOriginPtrForArgument(A, IRB, ArgOffset));
1086      assert(Size != 0 && Store != 0);
1087      DEBUG(dbgs() << "  Param:" << *Store << "\n");
1088      ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
1089    }
1090    DEBUG(dbgs() << "  done with call args\n");
1091
1092    FunctionType *FT =
1093      cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
1094    if (FT->isVarArg()) {
1095      VAHelper->visitCallSite(CS, IRB);
1096    }
1097
1098    // Now, get the shadow for the RetVal.
1099    if (!I.getType()->isSized()) return;
1100    IRBuilder<> IRBBefore(&I);
1101    // Untill we have full dynamic coverage, make sure the retval shadow is 0.
1102    Value *Base = getShadowPtrForRetval(&I, IRBBefore);
1103    IRBBefore.CreateStore(getCleanShadow(&I), Base);
1104    Instruction *NextInsn = 0;
1105    if (CS.isCall()) {
1106      NextInsn = I.getNextNode();
1107    } else {
1108      BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
1109      if (!NormalDest->getSinglePredecessor()) {
1110        // FIXME: this case is tricky, so we are just conservative here.
1111        // Perhaps we need to split the edge between this BB and NormalDest,
1112        // but a naive attempt to use SplitEdge leads to a crash.
1113        setShadow(&I, getCleanShadow(&I));
1114        setOrigin(&I, getCleanOrigin());
1115        return;
1116      }
1117      NextInsn = NormalDest->getFirstInsertionPt();
1118      assert(NextInsn &&
1119             "Could not find insertion point for retval shadow load");
1120    }
1121    IRBuilder<> IRBAfter(NextInsn);
1122    setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter),
1123                                      "_msret"));
1124    if (ClTrackOrigins)
1125      setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
1126  }
1127
1128  void visitReturnInst(ReturnInst &I) {
1129    IRBuilder<> IRB(&I);
1130    if (Value *RetVal = I.getReturnValue()) {
1131      // Set the shadow for the RetVal.
1132      Value *Shadow = getShadow(RetVal);
1133      Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
1134      DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
1135      IRB.CreateStore(Shadow, ShadowPtr);
1136      if (ClTrackOrigins)
1137        IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
1138    }
1139  }
1140
1141  void visitPHINode(PHINode &I) {
1142    IRBuilder<> IRB(&I);
1143    ShadowPHINodes.push_back(&I);
1144    setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
1145                                "_msphi_s"));
1146    if (ClTrackOrigins)
1147      setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
1148                                  "_msphi_o"));
1149  }
1150
1151  void visitAllocaInst(AllocaInst &I) {
1152    setShadow(&I, getCleanShadow(&I));
1153    if (!ClPoisonStack) return;
1154    IRBuilder<> IRB(I.getNextNode());
1155    uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
1156    if (ClPoisonStackWithCall) {
1157      IRB.CreateCall2(MS.MsanPoisonStackFn,
1158                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
1159                      ConstantInt::get(MS.IntptrTy, Size));
1160    } else {
1161      Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
1162      IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern),
1163                       Size, I.getAlignment());
1164    }
1165
1166    if (ClTrackOrigins) {
1167      setOrigin(&I, getCleanOrigin());
1168      SmallString<2048> StackDescriptionStorage;
1169      raw_svector_ostream StackDescription(StackDescriptionStorage);
1170      // We create a string with a description of the stack allocation and
1171      // pass it into __msan_set_alloca_origin.
1172      // It will be printed by the run-time if stack-originated UMR is found.
1173      // The first 4 bytes of the string are set to '----' and will be replaced
1174      // by __msan_va_arg_overflow_size_tls at the first call.
1175      StackDescription << "----" << I.getName() << "@" << F.getName();
1176      Value *Descr =
1177          createPrivateNonConstGlobalForString(*F.getParent(),
1178                                               StackDescription.str());
1179      IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
1180                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
1181                      ConstantInt::get(MS.IntptrTy, Size),
1182                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
1183    }
1184  }
1185
1186  void visitSelectInst(SelectInst& I) {
1187    IRBuilder<> IRB(&I);
1188    setShadow(&I,  IRB.CreateSelect(I.getCondition(),
1189              getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
1190              "_msprop"));
1191    if (ClTrackOrigins)
1192      setOrigin(&I, IRB.CreateSelect(I.getCondition(),
1193                getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue())));
1194  }
1195
1196  void visitLandingPadInst(LandingPadInst &I) {
1197    // Do nothing.
1198    // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1
1199    setShadow(&I, getCleanShadow(&I));
1200    setOrigin(&I, getCleanOrigin());
1201  }
1202
1203  void visitGetElementPtrInst(GetElementPtrInst &I) {
1204    handleShadowOr(I);
1205  }
1206
1207  void visitExtractValueInst(ExtractValueInst &I) {
1208    IRBuilder<> IRB(&I);
1209    Value *Agg = I.getAggregateOperand();
1210    DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
1211    Value *AggShadow = getShadow(Agg);
1212    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
1213    Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
1214    DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
1215    setShadow(&I, ResShadow);
1216    setOrigin(&I, getCleanOrigin());
1217  }
1218
1219  void visitInsertValueInst(InsertValueInst &I) {
1220    IRBuilder<> IRB(&I);
1221    DEBUG(dbgs() << "InsertValue:  " << I << "\n");
1222    Value *AggShadow = getShadow(I.getAggregateOperand());
1223    Value *InsShadow = getShadow(I.getInsertedValueOperand());
1224    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
1225    DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
1226    Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
1227    DEBUG(dbgs() << "   Res:        " << *Res << "\n");
1228    setShadow(&I, Res);
1229    setOrigin(&I, getCleanOrigin());
1230  }
1231
1232  void dumpInst(Instruction &I) {
1233    if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1234      errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
1235    } else {
1236      errs() << "ZZZ " << I.getOpcodeName() << "\n";
1237    }
1238    errs() << "QQQ " << I << "\n";
1239  }
1240
1241  void visitResumeInst(ResumeInst &I) {
1242    DEBUG(dbgs() << "Resume: " << I << "\n");
1243    // Nothing to do here.
1244  }
1245
1246  void visitInstruction(Instruction &I) {
1247    // Everything else: stop propagating and check for poisoned shadow.
1248    if (ClDumpStrictInstructions)
1249      dumpInst(I);
1250    DEBUG(dbgs() << "DEFAULT: " << I << "\n");
1251    for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
1252      insertCheck(I.getOperand(i), &I);
1253    setShadow(&I, getCleanShadow(&I));
1254    setOrigin(&I, getCleanOrigin());
1255  }
1256};
1257
1258/// \brief AMD64-specific implementation of VarArgHelper.
1259struct VarArgAMD64Helper : public VarArgHelper {
1260  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
1261  // See a comment in visitCallSite for more details.
1262  static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
1263  static const unsigned AMD64FpEndOffset = 176;
1264
1265  Function &F;
1266  MemorySanitizer &MS;
1267  MemorySanitizerVisitor &MSV;
1268  Value *VAArgTLSCopy;
1269  Value *VAArgOverflowSize;
1270
1271  SmallVector<CallInst*, 16> VAStartInstrumentationList;
1272
1273  VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
1274                    MemorySanitizerVisitor &MSV)
1275    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
1276
1277  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
1278
1279  ArgKind classifyArgument(Value* arg) {
1280    // A very rough approximation of X86_64 argument classification rules.
1281    Type *T = arg->getType();
1282    if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
1283      return AK_FloatingPoint;
1284    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
1285      return AK_GeneralPurpose;
1286    if (T->isPointerTy())
1287      return AK_GeneralPurpose;
1288    return AK_Memory;
1289  }
1290
1291  // For VarArg functions, store the argument shadow in an ABI-specific format
1292  // that corresponds to va_list layout.
1293  // We do this because Clang lowers va_arg in the frontend, and this pass
1294  // only sees the low level code that deals with va_list internals.
1295  // A much easier alternative (provided that Clang emits va_arg instructions)
1296  // would have been to associate each live instance of va_list with a copy of
1297  // MSanParamTLS, and extract shadow on va_arg() call in the argument list
1298  // order.
1299  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {
1300    unsigned GpOffset = 0;
1301    unsigned FpOffset = AMD64GpEndOffset;
1302    unsigned OverflowOffset = AMD64FpEndOffset;
1303    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
1304         ArgIt != End; ++ArgIt) {
1305      Value *A = *ArgIt;
1306      ArgKind AK = classifyArgument(A);
1307      if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
1308        AK = AK_Memory;
1309      if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
1310        AK = AK_Memory;
1311      Value *Base;
1312      switch (AK) {
1313      case AK_GeneralPurpose:
1314        Base = getShadowPtrForVAArgument(A, IRB, GpOffset);
1315        GpOffset += 8;
1316        break;
1317      case AK_FloatingPoint:
1318        Base = getShadowPtrForVAArgument(A, IRB, FpOffset);
1319        FpOffset += 16;
1320        break;
1321      case AK_Memory:
1322        uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType());
1323        Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
1324        OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
1325      }
1326      IRB.CreateStore(MSV.getShadow(A), Base);
1327    }
1328    Constant *OverflowSize =
1329      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
1330    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
1331  }
1332
1333  /// \brief Compute the shadow address for a given va_arg.
1334  Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB,
1335                                   int ArgOffset) {
1336    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
1337    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1338    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0),
1339                              "_msarg");
1340  }
1341
1342  void visitVAStartInst(VAStartInst &I) {
1343    IRBuilder<> IRB(&I);
1344    VAStartInstrumentationList.push_back(&I);
1345    Value *VAListTag = I.getArgOperand(0);
1346    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
1347
1348    // Unpoison the whole __va_list_tag.
1349    // FIXME: magic ABI constants.
1350    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
1351                     /* size */24, /* alignment */16, false);
1352  }
1353
1354  void visitVACopyInst(VACopyInst &I) {
1355    IRBuilder<> IRB(&I);
1356    Value *VAListTag = I.getArgOperand(0);
1357    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
1358
1359    // Unpoison the whole __va_list_tag.
1360    // FIXME: magic ABI constants.
1361    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
1362                     /* size */ 24, /* alignment */ 16, false);
1363  }
1364
1365  void finalizeInstrumentation() {
1366    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
1367           "finalizeInstrumentation called twice");
1368    if (!VAStartInstrumentationList.empty()) {
1369      // If there is a va_start in this function, make a backup copy of
1370      // va_arg_tls somewhere in the function entry block.
1371      IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1372      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
1373      Value *CopySize =
1374        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
1375                      VAArgOverflowSize);
1376      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
1377      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
1378    }
1379
1380    // Instrument va_start.
1381    // Copy va_list shadow from the backup copy of the TLS contents.
1382    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
1383      CallInst *OrigInst = VAStartInstrumentationList[i];
1384      IRBuilder<> IRB(OrigInst->getNextNode());
1385      Value *VAListTag = OrigInst->getArgOperand(0);
1386
1387      Value *RegSaveAreaPtrPtr =
1388        IRB.CreateIntToPtr(
1389          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
1390                        ConstantInt::get(MS.IntptrTy, 16)),
1391          Type::getInt64PtrTy(*MS.C));
1392      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
1393      Value *RegSaveAreaShadowPtr =
1394        MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
1395      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy,
1396                       AMD64FpEndOffset, 16);
1397
1398      Value *OverflowArgAreaPtrPtr =
1399        IRB.CreateIntToPtr(
1400          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
1401                        ConstantInt::get(MS.IntptrTy, 8)),
1402          Type::getInt64PtrTy(*MS.C));
1403      Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
1404      Value *OverflowArgAreaShadowPtr =
1405        MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
1406      Value *SrcPtr =
1407        getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
1408      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
1409    }
1410  }
1411};
1412
1413VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1414                                 MemorySanitizerVisitor &Visitor) {
1415  return new VarArgAMD64Helper(Func, Msan, Visitor);
1416}
1417
1418}  // namespace
1419
1420bool MemorySanitizer::runOnFunction(Function &F) {
1421  MemorySanitizerVisitor Visitor(F, *this);
1422
1423  // Clear out readonly/readnone attributes.
1424  AttrBuilder B;
1425  B.addAttribute(Attributes::ReadOnly)
1426    .addAttribute(Attributes::ReadNone);
1427  F.removeAttribute(AttrListPtr::FunctionIndex,
1428                    Attributes::get(F.getContext(), B));
1429
1430  return Visitor.runOnFunction();
1431}
1432