1//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the custom lowering code required by the shadow-stack GC
11// strategy.
12//
13// This pass implements the code transformation described in this paper:
14//   "Accurate Garbage Collection in an Uncooperative Environment"
15//   Fergus Henderson, ISMM, 2002
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/CodeGen/Passes.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/CodeGen/GCStrategy.h"
22#include "llvm/IR/CallSite.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/IntrinsicInst.h"
25#include "llvm/IR/Module.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "shadowstackgclowering"
30
31namespace {
32
33class ShadowStackGCLowering : public FunctionPass {
34  /// RootChain - This is the global linked-list that contains the chain of GC
35  /// roots.
36  GlobalVariable *Head;
37
38  /// StackEntryTy - Abstract type of a link in the shadow stack.
39  ///
40  StructType *StackEntryTy;
41  StructType *FrameMapTy;
42
43  /// Roots - GC roots in the current function. Each is a pair of the
44  /// intrinsic call and its corresponding alloca.
45  std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
46
47public:
48  static char ID;
49  ShadowStackGCLowering();
50
51  bool doInitialization(Module &M) override;
52  bool runOnFunction(Function &F) override;
53
54private:
55  bool IsNullValue(Value *V);
56  Constant *GetFrameMap(Function &F);
57  Type *GetConcreteStackEntryType(Function &F);
58  void CollectRoots(Function &F);
59  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
60                                      Type *Ty, Value *BasePtr, int Idx1,
61                                      const char *Name);
62  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
63                                      Type *Ty, Value *BasePtr, int Idx1, int Idx2,
64                                      const char *Name);
65};
66}
67
68INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
69                      "Shadow Stack GC Lowering", false, false)
70INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
71INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
72                    "Shadow Stack GC Lowering", false, false)
73
74FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
75
76char ShadowStackGCLowering::ID = 0;
77
78ShadowStackGCLowering::ShadowStackGCLowering()
79  : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
80    FrameMapTy(nullptr) {
81  initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
82}
83
84namespace {
85/// EscapeEnumerator - This is a little algorithm to find all escape points
86/// from a function so that "finally"-style code can be inserted. In addition
87/// to finding the existing return and unwind instructions, it also (if
88/// necessary) transforms any call instructions into invokes and sends them to
89/// a landing pad.
90///
91/// It's wrapped up in a state machine using the same transform C# uses for
92/// 'yield return' enumerators, This transform allows it to be non-allocating.
93class EscapeEnumerator {
94  Function &F;
95  const char *CleanupBBName;
96
97  // State.
98  int State;
99  Function::iterator StateBB, StateE;
100  IRBuilder<> Builder;
101
102public:
103  EscapeEnumerator(Function &F, const char *N = "cleanup")
104      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
105
106  IRBuilder<> *Next() {
107    switch (State) {
108    default:
109      return nullptr;
110
111    case 0:
112      StateBB = F.begin();
113      StateE = F.end();
114      State = 1;
115
116    case 1:
117      // Find all 'return', 'resume', and 'unwind' instructions.
118      while (StateBB != StateE) {
119        BasicBlock *CurBB = &*StateBB++;
120
121        // Branches and invokes do not escape, only unwind, resume, and return
122        // do.
123        TerminatorInst *TI = CurBB->getTerminator();
124        if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
125          continue;
126
127        Builder.SetInsertPoint(TI);
128        return &Builder;
129      }
130
131      State = 2;
132
133      // Find all 'call' instructions.
134      SmallVector<Instruction *, 16> Calls;
135      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
136        for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
137             ++II)
138          if (CallInst *CI = dyn_cast<CallInst>(II))
139            if (!CI->getCalledFunction() ||
140                !CI->getCalledFunction()->getIntrinsicID())
141              Calls.push_back(CI);
142
143      if (Calls.empty())
144        return nullptr;
145
146      // Create a cleanup block.
147      LLVMContext &C = F.getContext();
148      BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
149      Type *ExnTy =
150          StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
151      if (!F.hasPersonalityFn()) {
152        Constant *PersFn = F.getParent()->getOrInsertFunction(
153            "__gcc_personality_v0",
154            FunctionType::get(Type::getInt32Ty(C), true));
155        F.setPersonalityFn(PersFn);
156      }
157      LandingPadInst *LPad =
158          LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
159      LPad->setCleanup(true);
160      ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
161
162      // Transform the 'call' instructions into 'invoke's branching to the
163      // cleanup block. Go in reverse order to make prettier BB names.
164      SmallVector<Value *, 16> Args;
165      for (unsigned I = Calls.size(); I != 0;) {
166        CallInst *CI = cast<CallInst>(Calls[--I]);
167
168        // Split the basic block containing the function call.
169        BasicBlock *CallBB = CI->getParent();
170        BasicBlock *NewBB = CallBB->splitBasicBlock(
171            CI->getIterator(), CallBB->getName() + ".cont");
172
173        // Remove the unconditional branch inserted at the end of CallBB.
174        CallBB->getInstList().pop_back();
175        NewBB->getInstList().remove(CI);
176
177        // Create a new invoke instruction.
178        Args.clear();
179        CallSite CS(CI);
180        Args.append(CS.arg_begin(), CS.arg_end());
181
182        InvokeInst *II =
183            InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
184                               CI->getName(), CallBB);
185        II->setCallingConv(CI->getCallingConv());
186        II->setAttributes(CI->getAttributes());
187        CI->replaceAllUsesWith(II);
188        delete CI;
189      }
190
191      Builder.SetInsertPoint(RI);
192      return &Builder;
193    }
194  }
195};
196}
197
198
199Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
200  // doInitialization creates the abstract type of this value.
201  Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
202
203  // Truncate the ShadowStackDescriptor if some metadata is null.
204  unsigned NumMeta = 0;
205  SmallVector<Constant *, 16> Metadata;
206  for (unsigned I = 0; I != Roots.size(); ++I) {
207    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
208    if (!C->isNullValue())
209      NumMeta = I + 1;
210    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
211  }
212  Metadata.resize(NumMeta);
213
214  Type *Int32Ty = Type::getInt32Ty(F.getContext());
215
216  Constant *BaseElts[] = {
217      ConstantInt::get(Int32Ty, Roots.size(), false),
218      ConstantInt::get(Int32Ty, NumMeta, false),
219  };
220
221  Constant *DescriptorElts[] = {
222      ConstantStruct::get(FrameMapTy, BaseElts),
223      ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
224
225  Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
226  StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
227
228  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
229
230  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
231  //        that, short of multithreaded LLVM, it should be safe; all that is
232  //        necessary is that a simple Module::iterator loop not be invalidated.
233  //        Appending to the GlobalVariable list is safe in that sense.
234  //
235  //        All of the output passes emit globals last. The ExecutionEngine
236  //        explicitly supports adding globals to the module after
237  //        initialization.
238  //
239  //        Still, if it isn't deemed acceptable, then this transformation needs
240  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
241  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
242  //        provided a ModulePass))).
243  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
244                                    GlobalVariable::InternalLinkage, FrameMap,
245                                    "__gc_" + F.getName());
246
247  Constant *GEPIndices[2] = {
248      ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
249      ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
250  return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
251}
252
253Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
254  // doInitialization creates the generic version of this type.
255  std::vector<Type *> EltTys;
256  EltTys.push_back(StackEntryTy);
257  for (size_t I = 0; I != Roots.size(); I++)
258    EltTys.push_back(Roots[I].second->getAllocatedType());
259
260  return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
261}
262
263/// doInitialization - If this module uses the GC intrinsics, find them now. If
264/// not, exit fast.
265bool ShadowStackGCLowering::doInitialization(Module &M) {
266  bool Active = false;
267  for (Function &F : M) {
268    if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
269      Active = true;
270      break;
271    }
272  }
273  if (!Active)
274    return false;
275
276  // struct FrameMap {
277  //   int32_t NumRoots; // Number of roots in stack frame.
278  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
279  //   void *Meta[];     // May be absent for roots without metadata.
280  // };
281  std::vector<Type *> EltTys;
282  // 32 bits is ok up to a 32GB stack frame. :)
283  EltTys.push_back(Type::getInt32Ty(M.getContext()));
284  // Specifies length of variable length array.
285  EltTys.push_back(Type::getInt32Ty(M.getContext()));
286  FrameMapTy = StructType::create(EltTys, "gc_map");
287  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
288
289  // struct StackEntry {
290  //   ShadowStackEntry *Next; // Caller's stack entry.
291  //   FrameMap *Map;          // Pointer to constant FrameMap.
292  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
293  // };
294
295  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
296
297  EltTys.clear();
298  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
299  EltTys.push_back(FrameMapPtrTy);
300  StackEntryTy->setBody(EltTys);
301  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
302
303  // Get the root chain if it already exists.
304  Head = M.getGlobalVariable("llvm_gc_root_chain");
305  if (!Head) {
306    // If the root chain does not exist, insert a new one with linkonce
307    // linkage!
308    Head = new GlobalVariable(
309        M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
310        Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
311  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
312    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
313    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
314  }
315
316  return true;
317}
318
319bool ShadowStackGCLowering::IsNullValue(Value *V) {
320  if (Constant *C = dyn_cast<Constant>(V))
321    return C->isNullValue();
322  return false;
323}
324
325void ShadowStackGCLowering::CollectRoots(Function &F) {
326  // FIXME: Account for original alignment. Could fragment the root array.
327  //   Approach 1: Null initialize empty slots at runtime. Yuck.
328  //   Approach 2: Emit a map of the array instead of just a count.
329
330  assert(Roots.empty() && "Not cleaned up?");
331
332  SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
333
334  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
335    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
336      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
337        if (Function *F = CI->getCalledFunction())
338          if (F->getIntrinsicID() == Intrinsic::gcroot) {
339            std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
340                CI,
341                cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
342            if (IsNullValue(CI->getArgOperand(1)))
343              Roots.push_back(Pair);
344            else
345              MetaRoots.push_back(Pair);
346          }
347
348  // Number roots with metadata (usually empty) at the beginning, so that the
349  // FrameMap::Meta array can be elided.
350  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
351}
352
353GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
354                                                    IRBuilder<> &B, Type *Ty,
355                                                    Value *BasePtr, int Idx,
356                                                    int Idx2,
357                                                    const char *Name) {
358  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
359                      ConstantInt::get(Type::getInt32Ty(Context), Idx),
360                      ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
361  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
362
363  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
364
365  return dyn_cast<GetElementPtrInst>(Val);
366}
367
368GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
369                                            IRBuilder<> &B, Type *Ty, Value *BasePtr,
370                                            int Idx, const char *Name) {
371  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
372                      ConstantInt::get(Type::getInt32Ty(Context), Idx)};
373  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
374
375  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
376
377  return dyn_cast<GetElementPtrInst>(Val);
378}
379
380/// runOnFunction - Insert code to maintain the shadow stack.
381bool ShadowStackGCLowering::runOnFunction(Function &F) {
382  // Quick exit for functions that do not use the shadow stack GC.
383  if (!F.hasGC() ||
384      F.getGC() != std::string("shadow-stack"))
385    return false;
386
387  LLVMContext &Context = F.getContext();
388
389  // Find calls to llvm.gcroot.
390  CollectRoots(F);
391
392  // If there are no roots in this function, then there is no need to add a
393  // stack map entry for it.
394  if (Roots.empty())
395    return false;
396
397  // Build the constant map and figure the type of the shadow stack entry.
398  Value *FrameMap = GetFrameMap(F);
399  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
400
401  // Build the shadow stack entry at the very start of the function.
402  BasicBlock::iterator IP = F.getEntryBlock().begin();
403  IRBuilder<> AtEntry(IP->getParent(), IP);
404
405  Instruction *StackEntry =
406      AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
407
408  while (isa<AllocaInst>(IP))
409    ++IP;
410  AtEntry.SetInsertPoint(IP->getParent(), IP);
411
412  // Initialize the map pointer and load the current head of the shadow stack.
413  Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
414  Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
415                                       StackEntry, 0, 1, "gc_frame.map");
416  AtEntry.CreateStore(FrameMap, EntryMapPtr);
417
418  // After all the allocas...
419  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
420    // For each root, find the corresponding slot in the aggregate...
421    Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
422                               StackEntry, 1 + I, "gc_root");
423
424    // And use it in lieu of the alloca.
425    AllocaInst *OriginalAlloca = Roots[I].second;
426    SlotPtr->takeName(OriginalAlloca);
427    OriginalAlloca->replaceAllUsesWith(SlotPtr);
428  }
429
430  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
431  // really necessary (the collector would never see the intermediate state at
432  // runtime), but it's nicer not to push the half-initialized entry onto the
433  // shadow stack.
434  while (isa<StoreInst>(IP))
435    ++IP;
436  AtEntry.SetInsertPoint(IP->getParent(), IP);
437
438  // Push the entry onto the shadow stack.
439  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
440                                        StackEntry, 0, 0, "gc_frame.next");
441  Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
442                                      StackEntry, 0, "gc_newhead");
443  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
444  AtEntry.CreateStore(NewHeadVal, Head);
445
446  // For each instruction that escapes...
447  EscapeEnumerator EE(F, "gc_cleanup");
448  while (IRBuilder<> *AtExit = EE.Next()) {
449    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
450    // AtEntry, since that would make the value live for the entire function.
451    Instruction *EntryNextPtr2 =
452        CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
453                  "gc_frame.next");
454    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
455    AtExit->CreateStore(SavedHead, Head);
456  }
457
458  // Delete the original allocas (which are no longer used) and the intrinsic
459  // calls (which are no longer valid). Doing this last avoids invalidating
460  // iterators.
461  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
462    Roots[I].first->eraseFromParent();
463    Roots[I].second->eraseFromParent();
464  }
465
466  Roots.clear();
467  return true;
468}
469