1//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//! \file
11//! \brief This pass performs merges of loads and stores on both sides of a
12//  diamond (hammock). It hoists the loads and sinks the stores.
13//
14// The algorithm iteratively hoists two loads to the same address out of a
15// diamond (hammock) and merges them into a single load in the header. Similar
16// it sinks and merges two stores to the tail block (footer). The algorithm
17// iterates over the instructions of one side of the diamond and attempts to
18// find a matching load/store on the other side. It hoists / sinks when it
19// thinks it safe to do so.  This optimization helps with eg. hiding load
20// latencies, triggering if-conversion, and reducing static code size.
21//
22//===----------------------------------------------------------------------===//
23//
24//
25// Example:
26// Diamond shaped code before merge:
27//
28//            header:
29//                     br %cond, label %if.then, label %if.else
30//                        +                    +
31//                       +                      +
32//                      +                        +
33//            if.then:                         if.else:
34//               %lt = load %addr_l               %le = load %addr_l
35//               <use %lt>                        <use %le>
36//               <...>                            <...>
37//               store %st, %addr_s               store %se, %addr_s
38//               br label %if.end                 br label %if.end
39//                     +                         +
40//                      +                       +
41//                       +                     +
42//            if.end ("footer"):
43//                     <...>
44//
45// Diamond shaped code after merge:
46//
47//            header:
48//                     %l = load %addr_l
49//                     br %cond, label %if.then, label %if.else
50//                        +                    +
51//                       +                      +
52//                      +                        +
53//            if.then:                         if.else:
54//               <use %l>                         <use %l>
55//               <...>                            <...>
56//               br label %if.end                 br label %if.end
57//                      +                        +
58//                       +                      +
59//                        +                    +
60//            if.end ("footer"):
61//                     %s.sink = phi [%st, if.then], [%se, if.else]
62//                     <...>
63//                     store %s.sink, %addr_s
64//                     <...>
65//
66//
67//===----------------------- TODO -----------------------------------------===//
68//
69// 1) Generalize to regions other than diamonds
70// 2) Be more aggressive merging memory operations
71// Note that both changes require register pressure control
72//
73//===----------------------------------------------------------------------===//
74
75#include "llvm/Transforms/Scalar.h"
76#include "llvm/ADT/SetVector.h"
77#include "llvm/ADT/SmallPtrSet.h"
78#include "llvm/ADT/Statistic.h"
79#include "llvm/Analysis/AliasAnalysis.h"
80#include "llvm/Analysis/CFG.h"
81#include "llvm/Analysis/GlobalsModRef.h"
82#include "llvm/Analysis/Loads.h"
83#include "llvm/Analysis/MemoryBuiltins.h"
84#include "llvm/Analysis/MemoryDependenceAnalysis.h"
85#include "llvm/Analysis/TargetLibraryInfo.h"
86#include "llvm/IR/Metadata.h"
87#include "llvm/IR/PatternMatch.h"
88#include "llvm/Support/Allocator.h"
89#include "llvm/Support/CommandLine.h"
90#include "llvm/Support/Debug.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Transforms/Utils/BasicBlockUtils.h"
93#include "llvm/Transforms/Utils/SSAUpdater.h"
94#include <vector>
95
96using namespace llvm;
97
98#define DEBUG_TYPE "mldst-motion"
99
100//===----------------------------------------------------------------------===//
101//                         MergedLoadStoreMotion Pass
102//===----------------------------------------------------------------------===//
103
104namespace {
105class MergedLoadStoreMotion : public FunctionPass {
106  AliasAnalysis *AA;
107  MemoryDependenceAnalysis *MD;
108
109public:
110  static char ID; // Pass identification, replacement for typeid
111  MergedLoadStoreMotion()
112      : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
113    initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
114  }
115
116  bool runOnFunction(Function &F) override;
117
118private:
119  // This transformation requires dominator postdominator info
120  void getAnalysisUsage(AnalysisUsage &AU) const override {
121    AU.setPreservesCFG();
122    AU.addRequired<TargetLibraryInfoWrapperPass>();
123    AU.addRequired<AAResultsWrapperPass>();
124    AU.addPreserved<GlobalsAAWrapperPass>();
125    AU.addPreserved<MemoryDependenceAnalysis>();
126  }
127
128  // Helper routines
129
130  ///
131  /// \brief Remove instruction from parent and update memory dependence
132  /// analysis.
133  ///
134  void removeInstruction(Instruction *Inst);
135  BasicBlock *getDiamondTail(BasicBlock *BB);
136  bool isDiamondHead(BasicBlock *BB);
137  // Routines for hoisting loads
138  bool isLoadHoistBarrierInRange(const Instruction& Start,
139                                 const Instruction& End,
140                                 LoadInst* LI);
141  LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
142  void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
143                        Instruction *ElseInst);
144  bool isSafeToHoist(Instruction *I) const;
145  bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
146  bool mergeLoads(BasicBlock *BB);
147  // Routines for sinking stores
148  StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
149  PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
150  bool isStoreSinkBarrierInRange(const Instruction &Start,
151                                 const Instruction &End, MemoryLocation Loc);
152  bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
153  bool mergeStores(BasicBlock *BB);
154  // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
155  // where Size0 and Size1 are the #instructions on the two sides of
156  // the diamond. The constant chosen here is arbitrary. Compiler Time
157  // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
158  const int MagicCompileTimeControl;
159};
160
161char MergedLoadStoreMotion::ID = 0;
162} // anonymous namespace
163
164///
165/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
166///
167FunctionPass *llvm::createMergedLoadStoreMotionPass() {
168  return new MergedLoadStoreMotion();
169}
170
171INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
172                      "MergedLoadStoreMotion", false, false)
173INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
174INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
175INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
176INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
177INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
178                    "MergedLoadStoreMotion", false, false)
179
180///
181/// \brief Remove instruction from parent and update memory dependence analysis.
182///
183void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
184  // Notify the memory dependence analysis.
185  if (MD) {
186    MD->removeInstruction(Inst);
187    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
188      MD->invalidateCachedPointerInfo(LI->getPointerOperand());
189    if (Inst->getType()->getScalarType()->isPointerTy()) {
190      MD->invalidateCachedPointerInfo(Inst);
191    }
192  }
193  Inst->eraseFromParent();
194}
195
196///
197/// \brief Return tail block of a diamond.
198///
199BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
200  assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
201  BranchInst *BI = (BranchInst *)(BB->getTerminator());
202  BasicBlock *Succ0 = BI->getSuccessor(0);
203  BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
204  return Tail;
205}
206
207///
208/// \brief True when BB is the head of a diamond (hammock)
209///
210bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
211  if (!BB)
212    return false;
213  if (!isa<BranchInst>(BB->getTerminator()))
214    return false;
215  if (BB->getTerminator()->getNumSuccessors() != 2)
216    return false;
217
218  BranchInst *BI = (BranchInst *)(BB->getTerminator());
219  BasicBlock *Succ0 = BI->getSuccessor(0);
220  BasicBlock *Succ1 = BI->getSuccessor(1);
221
222  if (!Succ0->getSinglePredecessor() ||
223      Succ0->getTerminator()->getNumSuccessors() != 1)
224    return false;
225  if (!Succ1->getSinglePredecessor() ||
226      Succ1->getTerminator()->getNumSuccessors() != 1)
227    return false;
228
229  BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
230  // Ignore triangles.
231  if (Succ1->getTerminator()->getSuccessor(0) != Tail)
232    return false;
233  return true;
234}
235
236///
237/// \brief True when instruction is a hoist barrier for a load
238///
239/// Whenever an instruction could possibly modify the value
240/// being loaded or protect against the load from happening
241/// it is considered a hoist barrier.
242///
243bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
244                                                      const Instruction& End,
245                                                      LoadInst* LI) {
246  MemoryLocation Loc = MemoryLocation::get(LI);
247  return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
248}
249
250///
251/// \brief Decide if a load can be hoisted
252///
253/// When there is a load in \p BB to the same address as \p LI
254/// and it can be hoisted from \p BB, return that load.
255/// Otherwise return Null.
256///
257LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
258                                                   LoadInst *Load0) {
259
260  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
261       ++BBI) {
262    Instruction *Inst = &*BBI;
263
264    // Only merge and hoist loads when their result in used only in BB
265    if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
266      continue;
267
268    LoadInst *Load1 = dyn_cast<LoadInst>(Inst);
269    BasicBlock *BB0 = Load0->getParent();
270
271    MemoryLocation Loc0 = MemoryLocation::get(Load0);
272    MemoryLocation Loc1 = MemoryLocation::get(Load1);
273    if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
274        !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
275        !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
276      return Load1;
277    }
278  }
279  return nullptr;
280}
281
282///
283/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
284/// \p BB
285///
286/// BB is the head of a diamond
287///
288void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
289                                             Instruction *HoistCand,
290                                             Instruction *ElseInst) {
291  DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
292        dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
293        dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
294  // Hoist the instruction.
295  assert(HoistCand->getParent() != BB);
296
297  // Intersect optional metadata.
298  HoistCand->intersectOptionalDataWith(ElseInst);
299  HoistCand->dropUnknownNonDebugMetadata();
300
301  // Prepend point for instruction insert
302  Instruction *HoistPt = BB->getTerminator();
303
304  // Merged instruction
305  Instruction *HoistedInst = HoistCand->clone();
306
307  // Hoist instruction.
308  HoistedInst->insertBefore(HoistPt);
309
310  HoistCand->replaceAllUsesWith(HoistedInst);
311  removeInstruction(HoistCand);
312  // Replace the else block instruction.
313  ElseInst->replaceAllUsesWith(HoistedInst);
314  removeInstruction(ElseInst);
315}
316
317///
318/// \brief Return true if no operand of \p I is defined in I's parent block
319///
320bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
321  BasicBlock *Parent = I->getParent();
322  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
323    Instruction *Instr = dyn_cast<Instruction>(I->getOperand(i));
324    if (Instr && Instr->getParent() == Parent)
325      return false;
326  }
327  return true;
328}
329
330///
331/// \brief Merge two equivalent loads and GEPs and hoist into diamond head
332///
333bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
334                                      LoadInst *L1) {
335  // Only one definition?
336  Instruction *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
337  Instruction *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
338  if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
339      A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
340      A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
341      isa<GetElementPtrInst>(A0)) {
342    DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
343          dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
344          dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
345    hoistInstruction(BB, A0, A1);
346    hoistInstruction(BB, L0, L1);
347    return true;
348  } else
349    return false;
350}
351
352///
353/// \brief Try to hoist two loads to same address into diamond header
354///
355/// Starting from a diamond head block, iterate over the instructions in one
356/// successor block and try to match a load in the second successor.
357///
358bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
359  bool MergedLoads = false;
360  assert(isDiamondHead(BB));
361  BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
362  BasicBlock *Succ0 = BI->getSuccessor(0);
363  BasicBlock *Succ1 = BI->getSuccessor(1);
364  // #Instructions in Succ1 for Compile Time Control
365  int Size1 = Succ1->size();
366  int NLoads = 0;
367  for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
368       BBI != BBE;) {
369    Instruction *I = &*BBI;
370    ++BBI;
371
372    // Only move non-simple (atomic, volatile) loads.
373    LoadInst *L0 = dyn_cast<LoadInst>(I);
374    if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
375      continue;
376
377    ++NLoads;
378    if (NLoads * Size1 >= MagicCompileTimeControl)
379      break;
380    if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
381      bool Res = hoistLoad(BB, L0, L1);
382      MergedLoads |= Res;
383      // Don't attempt to hoist above loads that had not been hoisted.
384      if (!Res)
385        break;
386    }
387  }
388  return MergedLoads;
389}
390
391///
392/// \brief True when instruction is a sink barrier for a store
393/// located in Loc
394///
395/// Whenever an instruction could possibly read or modify the
396/// value being stored or protect against the store from
397/// happening it is considered a sink barrier.
398///
399bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
400                                                      const Instruction &End,
401                                                      MemoryLocation Loc) {
402  return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
403}
404
405///
406/// \brief Check if \p BB contains a store to the same address as \p SI
407///
408/// \return The store in \p  when it is safe to sink. Otherwise return Null.
409///
410StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
411                                                   StoreInst *Store0) {
412  DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
413  BasicBlock *BB0 = Store0->getParent();
414  for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
415       RBI != RBE; ++RBI) {
416    Instruction *Inst = &*RBI;
417
418    if (!isa<StoreInst>(Inst))
419       continue;
420
421    StoreInst *Store1 = cast<StoreInst>(Inst);
422
423    MemoryLocation Loc0 = MemoryLocation::get(Store0);
424    MemoryLocation Loc1 = MemoryLocation::get(Store1);
425    if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
426      !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
427                                 BB1->back(), Loc1) &&
428      !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
429                                 BB0->back(), Loc0)) {
430      return Store1;
431    }
432  }
433  return nullptr;
434}
435
436///
437/// \brief Create a PHI node in BB for the operands of S0 and S1
438///
439PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
440                                              StoreInst *S1) {
441  // Create a phi if the values mismatch.
442  PHINode *NewPN = nullptr;
443  Value *Opd1 = S0->getValueOperand();
444  Value *Opd2 = S1->getValueOperand();
445  if (Opd1 != Opd2) {
446    NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
447                            &BB->front());
448    NewPN->addIncoming(Opd1, S0->getParent());
449    NewPN->addIncoming(Opd2, S1->getParent());
450    if (MD && NewPN->getType()->getScalarType()->isPointerTy())
451      MD->invalidateCachedPointerInfo(NewPN);
452  }
453  return NewPN;
454}
455
456///
457/// \brief Merge two stores to same address and sink into \p BB
458///
459/// Also sinks GEP instruction computing the store address
460///
461bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
462                                      StoreInst *S1) {
463  // Only one definition?
464  Instruction *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
465  Instruction *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
466  if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
467      (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
468      (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
469    DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
470          dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
471          dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
472    // Hoist the instruction.
473    BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
474    // Intersect optional metadata.
475    S0->intersectOptionalDataWith(S1);
476    S0->dropUnknownNonDebugMetadata();
477
478    // Create the new store to be inserted at the join point.
479    StoreInst *SNew = (StoreInst *)(S0->clone());
480    Instruction *ANew = A0->clone();
481    SNew->insertBefore(&*InsertPt);
482    ANew->insertBefore(SNew);
483
484    assert(S0->getParent() == A0->getParent());
485    assert(S1->getParent() == A1->getParent());
486
487    PHINode *NewPN = getPHIOperand(BB, S0, S1);
488    // New PHI operand? Use it.
489    if (NewPN)
490      SNew->setOperand(0, NewPN);
491    removeInstruction(S0);
492    removeInstruction(S1);
493    A0->replaceAllUsesWith(ANew);
494    removeInstruction(A0);
495    A1->replaceAllUsesWith(ANew);
496    removeInstruction(A1);
497    return true;
498  }
499  return false;
500}
501
502///
503/// \brief True when two stores are equivalent and can sink into the footer
504///
505/// Starting from a diamond tail block, iterate over the instructions in one
506/// predecessor block and try to match a store in the second predecessor.
507///
508bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
509
510  bool MergedStores = false;
511  assert(T && "Footer of a diamond cannot be empty");
512
513  pred_iterator PI = pred_begin(T), E = pred_end(T);
514  assert(PI != E);
515  BasicBlock *Pred0 = *PI;
516  ++PI;
517  BasicBlock *Pred1 = *PI;
518  ++PI;
519  // tail block  of a diamond/hammock?
520  if (Pred0 == Pred1)
521    return false; // No.
522  if (PI != E)
523    return false; // No. More than 2 predecessors.
524
525  // #Instructions in Succ1 for Compile Time Control
526  int Size1 = Pred1->size();
527  int NStores = 0;
528
529  for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
530       RBI != RBE;) {
531
532    Instruction *I = &*RBI;
533    ++RBI;
534
535    // Sink move non-simple (atomic, volatile) stores
536    if (!isa<StoreInst>(I))
537      continue;
538    StoreInst *S0 = (StoreInst *)I;
539    if (!S0->isSimple())
540      continue;
541
542    ++NStores;
543    if (NStores * Size1 >= MagicCompileTimeControl)
544      break;
545    if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
546      bool Res = sinkStore(T, S0, S1);
547      MergedStores |= Res;
548      // Don't attempt to sink below stores that had to stick around
549      // But after removal of a store and some of its feeding
550      // instruction search again from the beginning since the iterator
551      // is likely stale at this point.
552      if (!Res)
553        break;
554      else {
555        RBI = Pred0->rbegin();
556        RBE = Pred0->rend();
557        DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
558      }
559    }
560  }
561  return MergedStores;
562}
563
564///
565/// \brief Run the transformation for each function
566///
567bool MergedLoadStoreMotion::runOnFunction(Function &F) {
568  MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
569  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
570
571  bool Changed = false;
572  DEBUG(dbgs() << "Instruction Merger\n");
573
574  // Merge unconditional branches, allowing PRE to catch more
575  // optimization opportunities.
576  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
577    BasicBlock *BB = &*FI++;
578
579    // Hoist equivalent loads and sink stores
580    // outside diamonds when possible
581    if (isDiamondHead(BB)) {
582      Changed |= mergeLoads(BB);
583      Changed |= mergeStores(getDiamondTail(BB));
584    }
585  }
586  return Changed;
587}
588