1//===- subzero/src/IceASanInstrumentation.cpp - ASan ------------*- C++ -*-===//
2//
3//                        The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Implements the AddressSanitizer instrumentation class.
12///
13//===----------------------------------------------------------------------===//
14
15#include "IceASanInstrumentation.h"
16
17#include "IceBuildDefs.h"
18#include "IceCfg.h"
19#include "IceCfgNode.h"
20#include "IceGlobalInits.h"
21#include "IceInst.h"
22#include "IceTargetLowering.h"
23#include "IceTypes.h"
24
25#include <sstream>
26#include <unordered_map>
27#include <unordered_set>
28#include <vector>
29
30namespace Ice {
31
32namespace {
33
34constexpr SizeT BytesPerWord = sizeof(uint32_t);
35constexpr SizeT RzSize = 32;
36constexpr SizeT ShadowScaleLog2 = 3;
37constexpr SizeT ShadowScale = 1 << ShadowScaleLog2;
38constexpr SizeT ShadowLength32 = 1 << (32 - ShadowScaleLog2);
39constexpr int32_t StackPoisonVal = -1;
40constexpr const char *ASanPrefix = "__asan";
41constexpr const char *RzPrefix = "__$rz";
42constexpr const char *RzArrayName = "__$rz_array";
43constexpr const char *RzSizesName = "__$rz_sizes";
44const llvm::NaClBitcodeRecord::RecordVector RzContents =
45    llvm::NaClBitcodeRecord::RecordVector(RzSize, 'R');
46
47// In order to instrument the code correctly, the .pexe must not have had its
48// symbols stripped.
49using StringMap = std::unordered_map<std::string, std::string>;
50using StringSet = std::unordered_set<std::string>;
51// TODO(tlively): Handle all allocation functions
52const StringMap FuncSubstitutions = {{"malloc", "__asan_malloc"},
53                                     {"free", "__asan_free"},
54                                     {"calloc", "__asan_calloc"},
55                                     {"__asan_dummy_calloc", "__asan_calloc"},
56                                     {"realloc", "__asan_realloc"}};
57const StringSet FuncBlackList = {"_Balloc"};
58
59llvm::NaClBitcodeRecord::RecordVector sizeToByteVec(SizeT Size) {
60  llvm::NaClBitcodeRecord::RecordVector SizeContents;
61  for (unsigned i = 0; i < sizeof(Size); ++i) {
62    SizeContents.emplace_back(Size % (1 << CHAR_BIT));
63    Size >>= CHAR_BIT;
64  }
65  return SizeContents;
66}
67
68} // end of anonymous namespace
69
70ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, LocalVars);
71ICE_TLS_DEFINE_FIELD(std::vector<InstStore *> *, ASanInstrumentation,
72                     LocalDtors);
73ICE_TLS_DEFINE_FIELD(CfgNode *, ASanInstrumentation, CurNode);
74ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, CheckedVars);
75
76bool ASanInstrumentation::isInstrumentable(Cfg *Func) {
77  std::string FuncName = Func->getFunctionName().toStringOrEmpty();
78  return FuncName == "" ||
79         (FuncBlackList.count(FuncName) == 0 && FuncName.find(ASanPrefix) != 0);
80}
81
82// Create redzones around all global variables, ensuring that the initializer
83// types of the redzones and their associated globals match so that they are
84// laid out together in memory.
85void ASanInstrumentation::instrumentGlobals(VariableDeclarationList &Globals) {
86  std::unique_lock<std::mutex> _(GlobalsMutex);
87  if (DidProcessGlobals)
88    return;
89  VariableDeclarationList NewGlobals;
90  // Global holding pointers to all redzones
91  auto *RzArray = VariableDeclaration::create(&NewGlobals);
92  // Global holding sizes of all redzones
93  auto *RzSizes = VariableDeclaration::create(&NewGlobals);
94
95  RzArray->setName(Ctx, RzArrayName);
96  RzSizes->setName(Ctx, RzSizesName);
97  RzArray->setIsConstant(true);
98  RzSizes->setIsConstant(true);
99  NewGlobals.push_back(RzArray);
100  NewGlobals.push_back(RzSizes);
101
102  using PrototypeMap = std::unordered_map<std::string, FunctionDeclaration *>;
103  PrototypeMap ProtoSubstitutions;
104  for (VariableDeclaration *Global : Globals) {
105    assert(Global->getAlignment() <= RzSize);
106    VariableDeclaration *RzLeft = VariableDeclaration::create(&NewGlobals);
107    VariableDeclaration *NewGlobal = Global;
108    VariableDeclaration *RzRight = VariableDeclaration::create(&NewGlobals);
109    RzLeft->setName(Ctx, nextRzName());
110    RzRight->setName(Ctx, nextRzName());
111    SizeT Alignment = std::max(RzSize, Global->getAlignment());
112    SizeT RzLeftSize = Alignment;
113    SizeT RzRightSize =
114        RzSize + Utils::OffsetToAlignment(Global->getNumBytes(), Alignment);
115    if (!Global->hasNonzeroInitializer()) {
116      RzLeft->addInitializer(VariableDeclaration::ZeroInitializer::create(
117          &NewGlobals, RzLeftSize));
118      RzRight->addInitializer(VariableDeclaration::ZeroInitializer::create(
119          &NewGlobals, RzRightSize));
120    } else {
121      RzLeft->addInitializer(VariableDeclaration::DataInitializer::create(
122          &NewGlobals, llvm::NaClBitcodeRecord::RecordVector(RzLeftSize, 'R')));
123      RzRight->addInitializer(VariableDeclaration::DataInitializer::create(
124          &NewGlobals,
125          llvm::NaClBitcodeRecord::RecordVector(RzRightSize, 'R')));
126
127      // replace any pointers to allocator functions
128      NewGlobal = VariableDeclaration::create(&NewGlobals);
129      NewGlobal->setName(Global->getName());
130      std::vector<VariableDeclaration::Initializer *> GlobalInits =
131          Global->getInitializers();
132      for (VariableDeclaration::Initializer *Init : GlobalInits) {
133        auto *RelocInit =
134            llvm::dyn_cast<VariableDeclaration::RelocInitializer>(Init);
135        if (RelocInit == nullptr) {
136          NewGlobal->addInitializer(Init);
137          continue;
138        }
139        const GlobalDeclaration *TargetDecl = RelocInit->getDeclaration();
140        const auto *TargetFunc =
141            llvm::dyn_cast<FunctionDeclaration>(TargetDecl);
142        if (TargetFunc == nullptr) {
143          NewGlobal->addInitializer(Init);
144          continue;
145        }
146        std::string TargetName = TargetDecl->getName().toStringOrEmpty();
147        StringMap::const_iterator Subst = FuncSubstitutions.find(TargetName);
148        if (Subst == FuncSubstitutions.end()) {
149          NewGlobal->addInitializer(Init);
150          continue;
151        }
152        std::string SubstName = Subst->second;
153        PrototypeMap::iterator SubstProtoEntry =
154            ProtoSubstitutions.find(SubstName);
155        FunctionDeclaration *SubstProto;
156        if (SubstProtoEntry != ProtoSubstitutions.end())
157          SubstProto = SubstProtoEntry->second;
158        else {
159          constexpr bool IsProto = true;
160          SubstProto = FunctionDeclaration::create(
161              Ctx, TargetFunc->getSignature(), TargetFunc->getCallingConv(),
162              llvm::GlobalValue::ExternalLinkage, IsProto);
163          SubstProto->setName(Ctx, SubstName);
164          ProtoSubstitutions.insert({SubstName, SubstProto});
165        }
166
167        NewGlobal->addInitializer(VariableDeclaration::RelocInitializer::create(
168            &NewGlobals, SubstProto, RelocOffsetArray(0)));
169      }
170    }
171
172    RzLeft->setIsConstant(Global->getIsConstant());
173    NewGlobal->setIsConstant(Global->getIsConstant());
174    RzRight->setIsConstant(Global->getIsConstant());
175    RzLeft->setAlignment(Alignment);
176    NewGlobal->setAlignment(Alignment);
177    RzRight->setAlignment(1);
178    RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
179        &NewGlobals, RzLeft, RelocOffsetArray(0)));
180    RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
181        &NewGlobals, RzRight, RelocOffsetArray(0)));
182    RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
183        &NewGlobals, sizeToByteVec(RzLeftSize)));
184    RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
185        &NewGlobals, sizeToByteVec(RzRightSize)));
186
187    NewGlobals.push_back(RzLeft);
188    NewGlobals.push_back(NewGlobal);
189    NewGlobals.push_back(RzRight);
190    RzGlobalsNum += 2;
191
192    GlobalSizes.insert({NewGlobal->getName(), NewGlobal->getNumBytes()});
193  }
194
195  // Replace old list of globals, without messing up arena allocators
196  Globals.clear();
197  Globals.merge(&NewGlobals);
198  DidProcessGlobals = true;
199
200  // Log the new set of globals
201  if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit)) {
202    OstreamLocker _(Ctx);
203    Ctx->getStrDump() << "========= Instrumented Globals =========\n";
204    for (VariableDeclaration *Global : Globals) {
205      Global->dump(Ctx->getStrDump());
206    }
207  }
208}
209
210std::string ASanInstrumentation::nextRzName() {
211  std::stringstream Name;
212  Name << RzPrefix << RzNum++;
213  return Name.str();
214}
215
216// Check for an alloca signaling the presence of local variables and add a
217// redzone if it is found
218void ASanInstrumentation::instrumentFuncStart(LoweringContext &Context) {
219  if (ICE_TLS_GET_FIELD(LocalDtors) == nullptr) {
220    ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstStore *>());
221    ICE_TLS_SET_FIELD(LocalVars, new VarSizeMap());
222  }
223  Cfg *Func = Context.getNode()->getCfg();
224  using Entry = std::pair<SizeT, int32_t>;
225  std::vector<InstAlloca *> NewAllocas;
226  std::vector<Entry> PoisonVals;
227  Variable *FirstShadowLocVar;
228  InstArithmetic *ShadowIndexCalc;
229  InstArithmetic *ShadowLocCalc;
230  InstAlloca *Cur;
231  ConstantInteger32 *VarSizeOp;
232  while (!Context.atEnd()) {
233    Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(Context.getCur()));
234    VarSizeOp = (Cur == nullptr)
235                    ? nullptr
236                    : llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes());
237    if (Cur == nullptr || VarSizeOp == nullptr) {
238      Context.advanceCur();
239      Context.advanceNext();
240      continue;
241    }
242
243    Cur->setDeleted();
244
245    if (PoisonVals.empty()) {
246      // insert leftmost redzone
247      auto *LastRzVar = Func->makeVariable(IceType_i32);
248      LastRzVar->setName(Func, nextRzName());
249      auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize);
250      constexpr SizeT Alignment = 8;
251      NewAllocas.emplace_back(
252          InstAlloca::create(Func, LastRzVar, ByteCount, Alignment));
253      PoisonVals.emplace_back(Entry{RzSize >> ShadowScaleLog2, StackPoisonVal});
254
255      // Calculate starting address for poisoning
256      FirstShadowLocVar = Func->makeVariable(IceType_i32);
257      FirstShadowLocVar->setName(Func, "firstShadowLoc");
258      auto *ShadowIndexVar = Func->makeVariable(IceType_i32);
259      ShadowIndexVar->setName(Func, "shadowIndex");
260
261      auto *ShadowScaleLog2Const =
262          ConstantInteger32::create(Ctx, IceType_i32, ShadowScaleLog2);
263      auto *ShadowMemLocConst =
264          ConstantInteger32::create(Ctx, IceType_i32, ShadowLength32);
265
266      ShadowIndexCalc =
267          InstArithmetic::create(Func, InstArithmetic::Lshr, ShadowIndexVar,
268                                 LastRzVar, ShadowScaleLog2Const);
269      ShadowLocCalc =
270          InstArithmetic::create(Func, InstArithmetic::Add, FirstShadowLocVar,
271                                 ShadowIndexVar, ShadowMemLocConst);
272    }
273
274    // create the new alloca that includes a redzone
275    SizeT VarSize = VarSizeOp->getValue();
276    Variable *Dest = Cur->getDest();
277    ICE_TLS_GET_FIELD(LocalVars)->insert({Dest, VarSize});
278    SizeT RzPadding = RzSize + Utils::OffsetToAlignment(VarSize, RzSize);
279    auto *ByteCount =
280        ConstantInteger32::create(Ctx, IceType_i32, VarSize + RzPadding);
281    constexpr SizeT Alignment = 8;
282    NewAllocas.emplace_back(
283        InstAlloca::create(Func, Dest, ByteCount, Alignment));
284
285    const SizeT Zeros = VarSize >> ShadowScaleLog2;
286    const SizeT Offset = VarSize % ShadowScale;
287    const SizeT PoisonBytes =
288        ((VarSize + RzPadding) >> ShadowScaleLog2) - Zeros - 1;
289    if (Zeros > 0)
290      PoisonVals.emplace_back(Entry{Zeros, 0});
291    PoisonVals.emplace_back(Entry{1, (Offset == 0) ? StackPoisonVal : Offset});
292    PoisonVals.emplace_back(Entry{PoisonBytes, StackPoisonVal});
293    Context.advanceCur();
294    Context.advanceNext();
295  }
296
297  Context.rewind();
298  if (PoisonVals.empty()) {
299    Context.advanceNext();
300    return;
301  }
302  for (InstAlloca *RzAlloca : NewAllocas) {
303    Context.insert(RzAlloca);
304  }
305  Context.insert(ShadowIndexCalc);
306  Context.insert(ShadowLocCalc);
307
308  // Poison redzones
309  std::vector<Entry>::iterator Iter = PoisonVals.begin();
310  for (SizeT Offset = 0; Iter != PoisonVals.end(); Offset += BytesPerWord) {
311    int32_t CurVals[BytesPerWord] = {0};
312    for (uint32_t i = 0; i < BytesPerWord; ++i) {
313      if (Iter == PoisonVals.end())
314        break;
315      Entry Val = *Iter;
316      CurVals[i] = Val.second;
317      --Val.first;
318      if (Val.first > 0)
319        *Iter = Val;
320      else
321        ++Iter;
322    }
323    int32_t Poison = ((CurVals[3] & 0xff) << 24) | ((CurVals[2] & 0xff) << 16) |
324                     ((CurVals[1] & 0xff) << 8) | (CurVals[0] & 0xff);
325    if (Poison == 0)
326      continue;
327    auto *PoisonConst = ConstantInteger32::create(Ctx, IceType_i32, Poison);
328    auto *ZeroConst = ConstantInteger32::create(Ctx, IceType_i32, 0);
329    auto *OffsetConst = ConstantInteger32::create(Ctx, IceType_i32, Offset);
330    auto *PoisonAddrVar = Func->makeVariable(IceType_i32);
331    Context.insert(InstArithmetic::create(Func, InstArithmetic::Add,
332                                          PoisonAddrVar, FirstShadowLocVar,
333                                          OffsetConst));
334    Context.insert(InstStore::create(Func, PoisonConst, PoisonAddrVar));
335    ICE_TLS_GET_FIELD(LocalDtors)
336        ->emplace_back(InstStore::create(Func, ZeroConst, PoisonAddrVar));
337  }
338  Context.advanceNext();
339}
340
341void ASanInstrumentation::instrumentCall(LoweringContext &Context,
342                                         InstCall *Instr) {
343  auto *CallTarget =
344      llvm::dyn_cast<ConstantRelocatable>(Instr->getCallTarget());
345  if (CallTarget == nullptr)
346    return;
347
348  std::string TargetName = CallTarget->getName().toStringOrEmpty();
349  auto Subst = FuncSubstitutions.find(TargetName);
350  if (Subst == FuncSubstitutions.end())
351    return;
352
353  std::string SubName = Subst->second;
354  Constant *NewFunc = Ctx->getConstantExternSym(Ctx->getGlobalString(SubName));
355  auto *NewCall =
356      InstCall::create(Context.getNode()->getCfg(), Instr->getNumArgs(),
357                       Instr->getDest(), NewFunc, Instr->isTailcall());
358  for (SizeT I = 0, Args = Instr->getNumArgs(); I < Args; ++I)
359    NewCall->addArg(Instr->getArg(I));
360  Context.insert(NewCall);
361  Instr->setDeleted();
362}
363
364void ASanInstrumentation::instrumentLoad(LoweringContext &Context,
365                                         InstLoad *Instr) {
366  Operand *Src = Instr->getSourceAddress();
367  if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
368    auto *NewLoad = InstLoad::create(Context.getNode()->getCfg(),
369                                     Instr->getDest(), instrumentReloc(Reloc));
370    Instr->setDeleted();
371    Context.insert(NewLoad);
372    Instr = NewLoad;
373  }
374  Constant *Func =
375      Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_load"));
376  instrumentAccess(Context, Instr->getSourceAddress(),
377                   typeWidthInBytes(Instr->getDest()->getType()), Func);
378}
379
380void ASanInstrumentation::instrumentStore(LoweringContext &Context,
381                                          InstStore *Instr) {
382  Operand *Data = Instr->getData();
383  if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Data)) {
384    auto *NewStore = InstStore::create(
385        Context.getNode()->getCfg(), instrumentReloc(Reloc), Instr->getAddr());
386    Instr->setDeleted();
387    Context.insert(NewStore);
388    Instr = NewStore;
389  }
390  Constant *Func =
391      Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_store"));
392  instrumentAccess(Context, Instr->getAddr(),
393                   typeWidthInBytes(Instr->getData()->getType()), Func);
394}
395
396ConstantRelocatable *
397ASanInstrumentation::instrumentReloc(ConstantRelocatable *Reloc) {
398  std::string DataName = Reloc->getName().toString();
399  StringMap::const_iterator DataSub = FuncSubstitutions.find(DataName);
400  if (DataSub != FuncSubstitutions.end()) {
401    return ConstantRelocatable::create(
402        Ctx, Reloc->getType(),
403        RelocatableTuple(Reloc->getOffset(), RelocOffsetArray(0),
404                         Ctx->getGlobalString(DataSub->second),
405                         Reloc->getEmitString()));
406  }
407  return Reloc;
408}
409
410void ASanInstrumentation::instrumentAccess(LoweringContext &Context,
411                                           Operand *Op, SizeT Size,
412                                           Constant *CheckFunc) {
413  // Skip redundant checks within basic blocks
414  VarSizeMap *Checked = ICE_TLS_GET_FIELD(CheckedVars);
415  if (ICE_TLS_GET_FIELD(CurNode) != Context.getNode()) {
416    ICE_TLS_SET_FIELD(CurNode, Context.getNode());
417    if (Checked == NULL) {
418      Checked = new VarSizeMap();
419      ICE_TLS_SET_FIELD(CheckedVars, Checked);
420    }
421    Checked->clear();
422  }
423  VarSizeMap::iterator PrevCheck = Checked->find(Op);
424  if (PrevCheck != Checked->end() && PrevCheck->second >= Size)
425    return;
426  else
427    Checked->insert({Op, Size});
428
429  // check for known good local access
430  VarSizeMap::iterator LocalSize = ICE_TLS_GET_FIELD(LocalVars)->find(Op);
431  if (LocalSize != ICE_TLS_GET_FIELD(LocalVars)->end() &&
432      LocalSize->second >= Size)
433    return;
434  if (isOkGlobalAccess(Op, Size))
435    return;
436  constexpr SizeT NumArgs = 2;
437  constexpr Variable *Void = nullptr;
438  constexpr bool NoTailCall = false;
439  auto *Call = InstCall::create(Context.getNode()->getCfg(), NumArgs, Void,
440                                CheckFunc, NoTailCall);
441  Call->addArg(Op);
442  Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, Size));
443  // play games to insert the call before the access instruction
444  InstList::iterator Next = Context.getNext();
445  Context.setInsertPoint(Context.getCur());
446  Context.insert(Call);
447  Context.setNext(Next);
448}
449
450// TODO(tlively): Trace back load and store addresses to find their real offsets
451bool ASanInstrumentation::isOkGlobalAccess(Operand *Op, SizeT Size) {
452  auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Op);
453  if (Reloc == nullptr)
454    return false;
455  RelocOffsetT Offset = Reloc->getOffset();
456  GlobalSizeMap::iterator GlobalSize = GlobalSizes.find(Reloc->getName());
457  return GlobalSize != GlobalSizes.end() && GlobalSize->second - Offset >= Size;
458}
459
460void ASanInstrumentation::instrumentRet(LoweringContext &Context, InstRet *) {
461  Cfg *Func = Context.getNode()->getCfg();
462  Context.setInsertPoint(Context.getCur());
463  for (InstStore *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) {
464    Context.insert(
465        InstStore::create(Func, RzUnpoison->getData(), RzUnpoison->getAddr()));
466  }
467  Context.advanceCur();
468  Context.advanceNext();
469}
470
471void ASanInstrumentation::instrumentStart(Cfg *Func) {
472  Constant *ShadowMemInit =
473      Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_init"));
474  constexpr SizeT NumArgs = 3;
475  constexpr Variable *Void = nullptr;
476  constexpr bool NoTailCall = false;
477  auto *Call = InstCall::create(Func, NumArgs, Void, ShadowMemInit, NoTailCall);
478  Func->getEntryNode()->getInsts().push_front(Call);
479
480  instrumentGlobals(*getGlobals());
481
482  Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, RzGlobalsNum));
483  Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzArrayName)));
484  Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzSizesName)));
485}
486
487// TODO(tlively): make this more efficient with swap idiom
488void ASanInstrumentation::finishFunc(Cfg *) {
489  ICE_TLS_GET_FIELD(LocalVars)->clear();
490  ICE_TLS_GET_FIELD(LocalDtors)->clear();
491}
492
493} // end of namespace Ice
494