1//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that collect the Linker Optimization Hint (LOH).
11// This pass should be run at the very end of the compilation flow, just before
12// assembly printer.
13// To be useful for the linker, the LOH must be printed into the assembly file.
14//
15// A LOH describes a sequence of instructions that may be optimized by the
16// linker.
17// This same sequence cannot be optimized by the compiler because some of
18// the information will be known at link time.
19// For instance, consider the following sequence:
20//     L1: adrp xA, sym@PAGE
21//     L2: add xB, xA, sym@PAGEOFF
22//     L3: ldr xC, [xB, #imm]
23// This sequence can be turned into:
24// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25//     L3: ldr xC, sym+#imm
26// It may also be turned into either the following more efficient
27// code sequences:
28// - If sym@PAGEOFF + #imm fits the encoding space of L3.
29//     L1: adrp xA, sym@PAGE
30//     L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
32//     L1: adr xA, sym
33//     L3: ldr xC, [xB, #imm]
34//
35// To be valid a LOH must meet all the requirements needed by all the related
36// possible linker transformations.
37// For instance, using the running example, the constraints to emit
38// ".loh AdrpAddLdr" are:
39// - L1, L2, and L3 instructions are of the expected type, i.e.,
40//   respectively ADRP, ADD (immediate), and LD.
41// - The result of L1 is used only by L2.
42// - The register argument (xA) used in the ADD instruction is defined
43//   only by L1.
44// - The result of L2 is used only by L3.
45// - The base address (xB) in L3 is defined only L2.
46// - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47//   @PAGE/@PAGEOFF with no additional constants
48//
49// Currently supported LOHs are:
50// * So called non-ADRP-related:
51//   - .loh AdrpAddLdr L1, L2, L3:
52//     L1: adrp xA, sym@PAGE
53//     L2: add xB, xA, sym@PAGEOFF
54//     L3: ldr xC, [xB, #imm]
55//   - .loh AdrpLdrGotLdr L1, L2, L3:
56//     L1: adrp xA, sym@GOTPAGE
57//     L2: ldr xB, [xA, sym@GOTPAGEOFF]
58//     L3: ldr xC, [xB, #imm]
59//   - .loh AdrpLdr L1, L3:
60//     L1: adrp xA, sym@PAGE
61//     L3: ldr xC, [xA, sym@PAGEOFF]
62//   - .loh AdrpAddStr L1, L2, L3:
63//     L1: adrp xA, sym@PAGE
64//     L2: add xB, xA, sym@PAGEOFF
65//     L3: str xC, [xB, #imm]
66//   - .loh AdrpLdrGotStr L1, L2, L3:
67//     L1: adrp xA, sym@GOTPAGE
68//     L2: ldr xB, [xA, sym@GOTPAGEOFF]
69//     L3: str xC, [xB, #imm]
70//   - .loh AdrpAdd L1, L2:
71//     L1: adrp xA, sym@PAGE
72//     L2: add xB, xA, sym@PAGEOFF
73//   For all these LOHs, L1, L2, L3 form a simple chain:
74//   L1 result is used only by L2 and L2 result by L3.
75//   L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
76//   by L1.
77// All these LOHs aim at using more efficient load/store patterns by folding
78// some instructions used to compute the address directly into the load/store.
79//
80// * So called ADRP-related:
81//  - .loh AdrpAdrp L2, L1:
82//    L2: ADRP xA, sym1@PAGE
83//    L1: ADRP xA, sym2@PAGE
84//    L2 dominates L1 and xA is not redifined between L2 and L1
85// This LOH aims at getting rid of redundant ADRP instructions.
86//
87// The overall design for emitting the LOHs is:
88// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90//     1. Associates them a label.
91//     2. Emits them in a MCStreamer (EmitLOHDirective).
92//         - The MCMachOStreamer records them into the MCAssembler.
93//         - The MCAsmStreamer prints them.
94//         - Other MCStreamers ignore them.
95//     3. Closes the MCStreamer:
96//         - The MachObjectWriter gets them from the MCAssembler and writes
97//           them in the object file.
98//         - Other ObjectWriters ignore them.
99//===----------------------------------------------------------------------===//
100
101#include "AArch64.h"
102#include "AArch64InstrInfo.h"
103#include "AArch64MachineFunctionInfo.h"
104#include "AArch64Subtarget.h"
105#include "MCTargetDesc/AArch64AddressingModes.h"
106#include "llvm/ADT/BitVector.h"
107#include "llvm/ADT/DenseMap.h"
108#include "llvm/ADT/MapVector.h"
109#include "llvm/ADT/SetVector.h"
110#include "llvm/ADT/SmallVector.h"
111#include "llvm/ADT/Statistic.h"
112#include "llvm/CodeGen/MachineBasicBlock.h"
113#include "llvm/CodeGen/MachineDominators.h"
114#include "llvm/CodeGen/MachineFunctionPass.h"
115#include "llvm/CodeGen/MachineInstr.h"
116#include "llvm/CodeGen/MachineInstrBuilder.h"
117#include "llvm/Support/CommandLine.h"
118#include "llvm/Support/Debug.h"
119#include "llvm/Support/ErrorHandling.h"
120#include "llvm/Support/raw_ostream.h"
121#include "llvm/Target/TargetInstrInfo.h"
122#include "llvm/Target/TargetMachine.h"
123#include "llvm/Target/TargetRegisterInfo.h"
124using namespace llvm;
125
126#define DEBUG_TYPE "aarch64-collect-loh"
127
128static cl::opt<bool>
129PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
130                   cl::desc("Restrict analysis to registers invovled"
131                            " in LOHs"),
132                   cl::init(true));
133
134static cl::opt<bool>
135BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
136                    cl::desc("Restrict analysis at basic block scope"),
137                    cl::init(true));
138
139STATISTIC(NumADRPSimpleCandidate,
140          "Number of simplifiable ADRP dominate by another");
141STATISTIC(NumADRPComplexCandidate2,
142          "Number of simplifiable ADRP reachable by 2 defs");
143STATISTIC(NumADRPComplexCandidate3,
144          "Number of simplifiable ADRP reachable by 3 defs");
145STATISTIC(NumADRPComplexCandidateOther,
146          "Number of simplifiable ADRP reachable by 4 or more defs");
147STATISTIC(NumADDToSTRWithImm,
148          "Number of simplifiable STR with imm reachable by ADD");
149STATISTIC(NumLDRToSTRWithImm,
150          "Number of simplifiable STR with imm reachable by LDR");
151STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
152STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
153STATISTIC(NumADDToLDRWithImm,
154          "Number of simplifiable LDR with imm reachable by ADD");
155STATISTIC(NumLDRToLDRWithImm,
156          "Number of simplifiable LDR with imm reachable by LDR");
157STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
158STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
159STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
160STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
161STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
162STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
163STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
164STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
165STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
166
167namespace llvm {
168void initializeAArch64CollectLOHPass(PassRegistry &);
169}
170
171#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
172
173namespace {
174struct AArch64CollectLOH : public MachineFunctionPass {
175  static char ID;
176  AArch64CollectLOH() : MachineFunctionPass(ID) {
177    initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
178  }
179
180  bool runOnMachineFunction(MachineFunction &MF) override;
181
182  const char *getPassName() const override {
183    return AARCH64_COLLECT_LOH_NAME;
184  }
185
186  void getAnalysisUsage(AnalysisUsage &AU) const override {
187    AU.setPreservesAll();
188    MachineFunctionPass::getAnalysisUsage(AU);
189    AU.addRequired<MachineDominatorTree>();
190  }
191
192private:
193};
194
195/// A set of MachineInstruction.
196typedef SetVector<const MachineInstr *> SetOfMachineInstr;
197/// Map a basic block to a set of instructions per register.
198/// This is used to represent the exposed uses of a basic block
199/// per register.
200typedef MapVector<const MachineBasicBlock *,
201                  std::unique_ptr<SetOfMachineInstr[]>>
202BlockToSetOfInstrsPerColor;
203/// Map a basic block to an instruction per register.
204/// This is used to represent the live-out definitions of a basic block
205/// per register.
206typedef MapVector<const MachineBasicBlock *,
207                  std::unique_ptr<const MachineInstr *[]>>
208BlockToInstrPerColor;
209/// Map an instruction to a set of instructions. Used to represent the
210/// mapping def to reachable uses or use to definitions.
211typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
212/// Map a basic block to a BitVector.
213/// This is used to record the kill registers per basic block.
214typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
215
216/// Map a register to a dense id.
217typedef DenseMap<unsigned, unsigned> MapRegToId;
218/// Map a dense id to a register. Used for debug purposes.
219typedef SmallVector<unsigned, 32> MapIdToReg;
220} // end anonymous namespace.
221
222char AArch64CollectLOH::ID = 0;
223
224INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
225                      AARCH64_COLLECT_LOH_NAME, false, false)
226INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
227INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
228                    AARCH64_COLLECT_LOH_NAME, false, false)
229
230/// Given a couple (MBB, reg) get the corresponding set of instruction from
231/// the given "sets".
232/// If this couple does not reference any set, an empty set is added to "sets"
233/// for this couple and returned.
234/// \param nbRegs is used internally allocate some memory. It must be consistent
235/// with the way sets is used.
236static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
237                                 const MachineBasicBlock &MBB, unsigned reg,
238                                 unsigned nbRegs) {
239  SetOfMachineInstr *result;
240  BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
241  if (it != sets.end())
242    result = it->second.get();
243  else
244    result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
245
246  return result[reg];
247}
248
249/// Given a couple (reg, MI) get the corresponding set of instructions from the
250/// the given "sets".
251/// This is used to get the uses record in sets of a definition identified by
252/// MI and reg, i.e., MI defines reg.
253/// If the couple does not reference anything, an empty set is added to
254/// "sets[reg]".
255/// \pre set[reg] is valid.
256static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
257                                  const MachineInstr &MI) {
258  return sets[reg][&MI];
259}
260
261/// Same as getUses but does not modify the input map: sets.
262/// \return NULL if the couple (reg, MI) is not in sets.
263static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
264                                        const MachineInstr &MI) {
265  InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
266  if (Res != sets[reg].end())
267    return &(Res->second);
268  return nullptr;
269}
270
271/// Initialize the reaching definition algorithm:
272/// For each basic block BB in MF, record:
273/// - its kill set.
274/// - its reachable uses (uses that are exposed to BB's predecessors).
275/// - its the generated definitions.
276/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
277/// the list of uses of exposed defintions.
278/// \param ADRPMode specifies to only consider ADRP instructions for generated
279/// definition. It also consider definitions of ADRP instructions as uses and
280/// ignore other uses. The ADRPMode is used to collect the information for LHO
281/// that involve ADRP operation only.
282static void initReachingDef(const MachineFunction &MF,
283                            InstrToInstrs *ColorOpToReachedUses,
284                            BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
285                            BlockToSetOfInstrsPerColor &ReachableUses,
286                            const MapRegToId &RegToId,
287                            const MachineInstr *DummyOp, bool ADRPMode) {
288  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
289  unsigned NbReg = RegToId.size();
290
291  for (const MachineBasicBlock &MBB : MF) {
292    auto &BBGen = Gen[&MBB];
293    BBGen = make_unique<const MachineInstr *[]>(NbReg);
294    std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
295
296    BitVector &BBKillSet = Kill[&MBB];
297    BBKillSet.resize(NbReg);
298    for (const MachineInstr &MI : MBB) {
299      bool IsADRP = MI.getOpcode() == AArch64::ADRP;
300
301      // Process uses first.
302      if (IsADRP || !ADRPMode)
303        for (const MachineOperand &MO : MI.operands()) {
304          // Treat ADRP def as use, as the goal of the analysis is to find
305          // ADRP defs reached by other ADRP defs.
306          if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
307              (ADRPMode && (!IsADRP || !MO.isDef())))
308            continue;
309          unsigned CurReg = MO.getReg();
310          MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
311          if (ItCurRegId == RegToId.end())
312            continue;
313          CurReg = ItCurRegId->second;
314
315          // if CurReg has not been defined, this use is reachable.
316          if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
317            getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
318          // current basic block definition for this color, if any, is in Gen.
319          if (BBGen[CurReg])
320            getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
321        }
322
323      // Process clobbers.
324      for (const MachineOperand &MO : MI.operands()) {
325        if (!MO.isRegMask())
326          continue;
327        // Clobbers kill the related colors.
328        const uint32_t *PreservedRegs = MO.getRegMask();
329
330        // Set generated regs.
331        for (const auto &Entry : RegToId) {
332          unsigned Reg = Entry.second;
333          // Use the global register ID when querying APIs external to this
334          // pass.
335          if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
336            // Do not register clobbered definition for no ADRP.
337            // This definition is not used anyway (otherwise register
338            // allocation is wrong).
339            BBGen[Reg] = ADRPMode ? &MI : nullptr;
340            BBKillSet.set(Reg);
341          }
342        }
343      }
344
345      // Process register defs.
346      for (const MachineOperand &MO : MI.operands()) {
347        if (!MO.isReg() || !MO.isDef())
348          continue;
349        unsigned CurReg = MO.getReg();
350        MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
351        if (ItCurRegId == RegToId.end())
352          continue;
353
354        for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
355          MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
356          // If this alias has not been recorded, then it is not interesting
357          // for the current analysis.
358          // We can end up in this situation because of tuple registers.
359          // E.g., Let say we are interested in S1. When we register
360          // S1, we will also register its aliases and in particular
361          // the tuple Q1_Q2.
362          // Now, when we encounter Q1_Q2, we will look through its aliases
363          // and will find that S2 is not registered.
364          if (ItRegId == RegToId.end())
365            continue;
366
367          BBKillSet.set(ItRegId->second);
368          BBGen[ItRegId->second] = &MI;
369        }
370        BBGen[ItCurRegId->second] = &MI;
371      }
372    }
373
374    // If we restrict our analysis to basic block scope, conservatively add a
375    // dummy
376    // use for each generated value.
377    if (!ADRPMode && DummyOp && !MBB.succ_empty())
378      for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
379        if (BBGen[CurReg])
380          getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
381  }
382}
383
384/// Reaching def core algorithm:
385/// while an Out has changed
386///    for each bb
387///       for each color
388///           In[bb][color] = U Out[bb.predecessors][color]
389///           insert reachableUses[bb][color] in each in[bb][color]
390///                 op.reachedUses
391///
392///           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
393static void reachingDefAlgorithm(const MachineFunction &MF,
394                                 InstrToInstrs *ColorOpToReachedUses,
395                                 BlockToSetOfInstrsPerColor &In,
396                                 BlockToSetOfInstrsPerColor &Out,
397                                 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
398                                 BlockToSetOfInstrsPerColor &ReachableUses,
399                                 unsigned NbReg) {
400  bool HasChanged;
401  do {
402    HasChanged = false;
403    for (const MachineBasicBlock &MBB : MF) {
404      unsigned CurReg;
405      for (CurReg = 0; CurReg < NbReg; ++CurReg) {
406        SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
407        SetOfMachineInstr &BBReachableUses =
408            getSet(ReachableUses, MBB, CurReg, NbReg);
409        SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
410        unsigned Size = BBOutSet.size();
411        //   In[bb][color] = U Out[bb.predecessors][color]
412        for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
413          SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
414          BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
415        }
416        //   insert reachableUses[bb][color] in each in[bb][color] op.reachedses
417        for (const MachineInstr *MI : BBInSet) {
418          SetOfMachineInstr &OpReachedUses =
419              getUses(ColorOpToReachedUses, CurReg, *MI);
420          OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
421        }
422        //           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
423        if (!Kill[&MBB].test(CurReg))
424          BBOutSet.insert(BBInSet.begin(), BBInSet.end());
425        if (Gen[&MBB][CurReg])
426          BBOutSet.insert(Gen[&MBB][CurReg]);
427        HasChanged |= BBOutSet.size() != Size;
428      }
429    }
430  } while (HasChanged);
431}
432
433/// Reaching definition algorithm.
434/// \param MF function on which the algorithm will operate.
435/// \param[out] ColorOpToReachedUses will contain the result of the reaching
436/// def algorithm.
437/// \param ADRPMode specify whether the reaching def algorithm should be tuned
438/// for ADRP optimization. \see initReachingDef for more details.
439/// \param DummyOp if not NULL, the algorithm will work at
440/// basic block scope and will set for every exposed definition a use to
441/// @p DummyOp.
442/// \pre ColorOpToReachedUses is an array of at least number of registers of
443/// InstrToInstrs.
444static void reachingDef(const MachineFunction &MF,
445                        InstrToInstrs *ColorOpToReachedUses,
446                        const MapRegToId &RegToId, bool ADRPMode = false,
447                        const MachineInstr *DummyOp = nullptr) {
448  // structures:
449  // For each basic block.
450  // Out: a set per color of definitions that reach the
451  //      out boundary of this block.
452  // In: Same as Out but for in boundary.
453  // Gen: generated color in this block (one operation per color).
454  // Kill: register set of killed color in this block.
455  // ReachableUses: a set per color of uses (operation) reachable
456  //                for "In" definitions.
457  BlockToSetOfInstrsPerColor Out, In, ReachableUses;
458  BlockToInstrPerColor Gen;
459  BlockToRegSet Kill;
460
461  // Initialize Gen, kill and reachableUses.
462  initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
463                  DummyOp, ADRPMode);
464
465  // Algo.
466  if (!DummyOp)
467    reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
468                         ReachableUses, RegToId.size());
469}
470
471#ifndef NDEBUG
472/// print the result of the reaching definition algorithm.
473static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
474                             unsigned NbReg, const TargetRegisterInfo *TRI,
475                             const MapIdToReg &IdToReg) {
476  unsigned CurReg;
477  for (CurReg = 0; CurReg < NbReg; ++CurReg) {
478    if (ColorOpToReachedUses[CurReg].empty())
479      continue;
480    DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
481
482    for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
483      DEBUG(dbgs() << "Def:\n");
484      DEBUG(DefsIt.first->print(dbgs()));
485      DEBUG(dbgs() << "Reachable uses:\n");
486      for (const MachineInstr *MI : DefsIt.second) {
487        DEBUG(MI->print(dbgs()));
488      }
489    }
490  }
491}
492#endif // NDEBUG
493
494/// Answer the following question: Can Def be one of the definition
495/// involved in a part of a LOH?
496static bool canDefBePartOfLOH(const MachineInstr *Def) {
497  unsigned Opc = Def->getOpcode();
498  // Accept ADRP, ADDLow and LOADGot.
499  switch (Opc) {
500  default:
501    return false;
502  case AArch64::ADRP:
503    return true;
504  case AArch64::ADDXri:
505    // Check immediate to see if the immediate is an address.
506    switch (Def->getOperand(2).getType()) {
507    default:
508      return false;
509    case MachineOperand::MO_GlobalAddress:
510    case MachineOperand::MO_JumpTableIndex:
511    case MachineOperand::MO_ConstantPoolIndex:
512    case MachineOperand::MO_BlockAddress:
513      return true;
514    }
515  case AArch64::LDRXui:
516    // Check immediate to see if the immediate is an address.
517    switch (Def->getOperand(2).getType()) {
518    default:
519      return false;
520    case MachineOperand::MO_GlobalAddress:
521      return true;
522    }
523  }
524  // Unreachable.
525  return false;
526}
527
528/// Check whether the given instruction can the end of a LOH chain involving a
529/// store.
530static bool isCandidateStore(const MachineInstr *Instr) {
531  switch (Instr->getOpcode()) {
532  default:
533    return false;
534  case AArch64::STRBBui:
535  case AArch64::STRHHui:
536  case AArch64::STRBui:
537  case AArch64::STRHui:
538  case AArch64::STRWui:
539  case AArch64::STRXui:
540  case AArch64::STRSui:
541  case AArch64::STRDui:
542  case AArch64::STRQui:
543    // In case we have str xA, [xA, #imm], this is two different uses
544    // of xA and we cannot fold, otherwise the xA stored may be wrong,
545    // even if #imm == 0.
546    if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
547      return true;
548  }
549  return false;
550}
551
552/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
553/// Build the Use to Defs information and filter out obvious non-LOH candidates.
554/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
555/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
556/// i.e., no simple chain.
557/// \param ADRPMode -- \see initReachingDef.
558static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
559                              const InstrToInstrs *ColorOpToReachedUses,
560                              const MapRegToId &RegToId,
561                              bool ADRPMode = false) {
562
563  SetOfMachineInstr NotCandidate;
564  unsigned NbReg = RegToId.size();
565  MapRegToId::const_iterator EndIt = RegToId.end();
566  for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
567    // If this color is never defined, continue.
568    if (ColorOpToReachedUses[CurReg].empty())
569      continue;
570
571    for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
572      for (const MachineInstr *MI : DefsIt.second) {
573        const MachineInstr *Def = DefsIt.first;
574        MapRegToId::const_iterator It;
575        // if all the reaching defs are not adrp, this use will not be
576        // simplifiable.
577        if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
578            (!ADRPMode && !canDefBePartOfLOH(Def)) ||
579            (!ADRPMode && isCandidateStore(MI) &&
580             // store are LOH candidate iff the end of the chain is used as
581             // base.
582             ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
583              It->second != CurReg))) {
584          NotCandidate.insert(MI);
585          continue;
586        }
587        // Do not consider self reaching as a simplifiable case for ADRP.
588        if (!ADRPMode || MI != DefsIt.first) {
589          UseToReachingDefs[MI].insert(DefsIt.first);
590          // If UsesIt has several reaching definitions, it is not
591          // candidate for simplificaton in non-ADRPMode.
592          if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
593            NotCandidate.insert(MI);
594        }
595      }
596    }
597  }
598  for (const MachineInstr *Elem : NotCandidate) {
599    DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
600    // It would have been better if we could just remove the entry
601    // from the map.  Because of that, we have to filter the garbage
602    // (second.empty) in the subsequence analysis.
603    UseToReachingDefs[Elem].clear();
604  }
605}
606
607/// Based on the use to defs information (in ADRPMode), compute the
608/// opportunities of LOH ADRP-related.
609static void computeADRP(const InstrToInstrs &UseToDefs,
610                        AArch64FunctionInfo &AArch64FI,
611                        const MachineDominatorTree *MDT) {
612  DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
613  for (const auto &Entry : UseToDefs) {
614    unsigned Size = Entry.second.size();
615    if (Size == 0)
616      continue;
617    if (Size == 1) {
618      const MachineInstr *L2 = *Entry.second.begin();
619      const MachineInstr *L1 = Entry.first;
620      if (!MDT->dominates(L2, L1)) {
621        DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
622                     << '\n');
623        continue;
624      }
625      DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
626      SmallVector<const MachineInstr *, 2> Args;
627      Args.push_back(L2);
628      Args.push_back(L1);
629      AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
630      ++NumADRPSimpleCandidate;
631    }
632#ifdef DEBUG
633    else if (Size == 2)
634      ++NumADRPComplexCandidate2;
635    else if (Size == 3)
636      ++NumADRPComplexCandidate3;
637    else
638      ++NumADRPComplexCandidateOther;
639#endif
640    // if Size < 1, the use should have been removed from the candidates
641    assert(Size >= 1 && "No reaching defs for that use!");
642  }
643}
644
645/// Check whether the given instruction can be the end of a LOH chain
646/// involving a load.
647static bool isCandidateLoad(const MachineInstr *Instr) {
648  switch (Instr->getOpcode()) {
649  default:
650    return false;
651  case AArch64::LDRSBWui:
652  case AArch64::LDRSBXui:
653  case AArch64::LDRSHWui:
654  case AArch64::LDRSHXui:
655  case AArch64::LDRSWui:
656  case AArch64::LDRBui:
657  case AArch64::LDRHui:
658  case AArch64::LDRWui:
659  case AArch64::LDRXui:
660  case AArch64::LDRSui:
661  case AArch64::LDRDui:
662  case AArch64::LDRQui:
663    if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
664      return false;
665    return true;
666  }
667  // Unreachable.
668  return false;
669}
670
671/// Check whether the given instruction can load a litteral.
672static bool supportLoadFromLiteral(const MachineInstr *Instr) {
673  switch (Instr->getOpcode()) {
674  default:
675    return false;
676  case AArch64::LDRSWui:
677  case AArch64::LDRWui:
678  case AArch64::LDRXui:
679  case AArch64::LDRSui:
680  case AArch64::LDRDui:
681  case AArch64::LDRQui:
682    return true;
683  }
684  // Unreachable.
685  return false;
686}
687
688/// Check whether the given instruction is a LOH candidate.
689/// \param UseToDefs is used to check that Instr is at the end of LOH supported
690/// chain.
691/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
692/// already been filtered out.
693static bool isCandidate(const MachineInstr *Instr,
694                        const InstrToInstrs &UseToDefs,
695                        const MachineDominatorTree *MDT) {
696  if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
697    return false;
698
699  const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
700  if (Def->getOpcode() != AArch64::ADRP) {
701    // At this point, Def is ADDXri or LDRXui of the right type of
702    // symbol, because we filtered out the uses that were not defined
703    // by these kind of instructions (+ ADRP).
704
705    // Check if this forms a simple chain: each intermediate node must
706    // dominates the next one.
707    if (!MDT->dominates(Def, Instr))
708      return false;
709    // Move one node up in the simple chain.
710    if (UseToDefs.find(Def) ==
711            UseToDefs.end()
712            // The map may contain garbage we have to ignore.
713        ||
714        UseToDefs.find(Def)->second.empty())
715      return false;
716    Instr = Def;
717    Def = *UseToDefs.find(Def)->second.begin();
718  }
719  // Check if we reached the top of the simple chain:
720  // - top is ADRP.
721  // - check the simple chain property: each intermediate node must
722  // dominates the next one.
723  if (Def->getOpcode() == AArch64::ADRP)
724    return MDT->dominates(Def, Instr);
725  return false;
726}
727
728static bool registerADRCandidate(const MachineInstr &Use,
729                                 const InstrToInstrs &UseToDefs,
730                                 const InstrToInstrs *DefsPerColorToUses,
731                                 AArch64FunctionInfo &AArch64FI,
732                                 SetOfMachineInstr *InvolvedInLOHs,
733                                 const MapRegToId &RegToId) {
734  // Look for opportunities to turn ADRP -> ADD or
735  // ADRP -> LDR GOTPAGEOFF into ADR.
736  // If ADRP has more than one use. Give up.
737  if (Use.getOpcode() != AArch64::ADDXri &&
738      (Use.getOpcode() != AArch64::LDRXui ||
739       !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
740    return false;
741  InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
742  // The map may contain garbage that we need to ignore.
743  if (It == UseToDefs.end() || It->second.empty())
744    return false;
745  const MachineInstr &Def = **It->second.begin();
746  if (Def.getOpcode() != AArch64::ADRP)
747    return false;
748  // Check the number of users of ADRP.
749  const SetOfMachineInstr *Users =
750      getUses(DefsPerColorToUses,
751              RegToId.find(Def.getOperand(0).getReg())->second, Def);
752  if (Users->size() > 1) {
753    ++NumADRComplexCandidate;
754    return false;
755  }
756  ++NumADRSimpleCandidate;
757  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
758         "ADRP already involved in LOH.");
759  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
760         "ADD already involved in LOH.");
761  DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
762
763  SmallVector<const MachineInstr *, 2> Args;
764  Args.push_back(&Def);
765  Args.push_back(&Use);
766
767  AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd
768                                                           : MCLOH_AdrpLdrGot,
769                          Args);
770  return true;
771}
772
773/// Based on the use to defs information (in non-ADRPMode), compute the
774/// opportunities of LOH non-ADRP-related
775static void computeOthers(const InstrToInstrs &UseToDefs,
776                          const InstrToInstrs *DefsPerColorToUses,
777                          AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
778                          const MachineDominatorTree *MDT) {
779  SetOfMachineInstr *InvolvedInLOHs = nullptr;
780#ifdef DEBUG
781  SetOfMachineInstr InvolvedInLOHsStorage;
782  InvolvedInLOHs = &InvolvedInLOHsStorage;
783#endif // DEBUG
784  DEBUG(dbgs() << "*** Compute LOH for Others\n");
785  // ADRP -> ADD/LDR -> LDR/STR pattern.
786  // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
787
788  // FIXME: When the statistics are not important,
789  // This initial filtering loop can be merged into the next loop.
790  // Currently, we didn't do it to have the same code for both DEBUG and
791  // NDEBUG builds. Indeed, the iterator of the second loop would need
792  // to be changed.
793  SetOfMachineInstr PotentialCandidates;
794  SetOfMachineInstr PotentialADROpportunities;
795  for (auto &Use : UseToDefs) {
796    // If no definition is available, this is a non candidate.
797    if (Use.second.empty())
798      continue;
799    // Keep only instructions that are load or store and at the end of
800    // a ADRP -> ADD/LDR/Nothing chain.
801    // We already filtered out the no-chain cases.
802    if (!isCandidate(Use.first, UseToDefs, MDT)) {
803      PotentialADROpportunities.insert(Use.first);
804      continue;
805    }
806    PotentialCandidates.insert(Use.first);
807  }
808
809  // Make the following distinctions for statistics as the linker does
810  // know how to decode instructions:
811  // - ADD/LDR/Nothing make there different patterns.
812  // - LDR/STR make two different patterns.
813  // Hence, 6 - 1 base patterns.
814  // (because ADRP-> Nothing -> STR is not simplifiable)
815
816  // The linker is only able to have a simple semantic, i.e., if pattern A
817  // do B.
818  // However, we want to see the opportunity we may miss if we were able to
819  // catch more complex cases.
820
821  // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
822  // A potential candidate becomes a candidate, if its current immediate
823  // operand is zero and all nodes of the chain have respectively only one user
824#ifdef DEBUG
825  SetOfMachineInstr DefsOfPotentialCandidates;
826#endif
827  for (const MachineInstr *Candidate : PotentialCandidates) {
828    // Get the definition of the candidate i.e., ADD or LDR.
829    const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
830    // Record the elements of the chain.
831    const MachineInstr *L1 = Def;
832    const MachineInstr *L2 = nullptr;
833    unsigned ImmediateDefOpc = Def->getOpcode();
834    if (Def->getOpcode() != AArch64::ADRP) {
835      // Check the number of users of this node.
836      const SetOfMachineInstr *Users =
837          getUses(DefsPerColorToUses,
838                  RegToId.find(Def->getOperand(0).getReg())->second, *Def);
839      if (Users->size() > 1) {
840#ifdef DEBUG
841        // if all the uses of this def are in potential candidate, this is
842        // a complex candidate of level 2.
843        bool IsLevel2 = true;
844        for (const MachineInstr *MI : *Users) {
845          if (!PotentialCandidates.count(MI)) {
846            ++NumTooCplxLvl2;
847            IsLevel2 = false;
848            break;
849          }
850        }
851        if (IsLevel2)
852          ++NumCplxLvl2;
853#endif // DEBUG
854        PotentialADROpportunities.insert(Def);
855        continue;
856      }
857      L2 = Def;
858      Def = *UseToDefs.find(Def)->second.begin();
859      L1 = Def;
860    } // else the element in the middle of the chain is nothing, thus
861      // Def already contains the first element of the chain.
862
863    // Check the number of users of the first node in the chain, i.e., ADRP
864    const SetOfMachineInstr *Users =
865        getUses(DefsPerColorToUses,
866                RegToId.find(Def->getOperand(0).getReg())->second, *Def);
867    if (Users->size() > 1) {
868#ifdef DEBUG
869      // if all the uses of this def are in the defs of the potential candidate,
870      // this is a complex candidate of level 1
871      if (DefsOfPotentialCandidates.empty()) {
872        // lazy init
873        DefsOfPotentialCandidates = PotentialCandidates;
874        for (const MachineInstr *Candidate : PotentialCandidates) {
875          if (!UseToDefs.find(Candidate)->second.empty())
876            DefsOfPotentialCandidates.insert(
877                *UseToDefs.find(Candidate)->second.begin());
878        }
879      }
880      bool Found = false;
881      for (auto &Use : *Users) {
882        if (!DefsOfPotentialCandidates.count(Use)) {
883          ++NumTooCplxLvl1;
884          Found = true;
885          break;
886        }
887      }
888      if (!Found)
889        ++NumCplxLvl1;
890#endif // DEBUG
891      continue;
892    }
893
894    bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
895    // If the chain is three instructions long and ldr is the second element,
896    // then this ldr must load form GOT, otherwise this is not a correct chain.
897    if (L2 && !IsL2Add &&
898        !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
899      continue;
900    SmallVector<const MachineInstr *, 3> Args;
901    MCLOHType Kind;
902    if (isCandidateLoad(Candidate)) {
903      if (!L2) {
904        // At this point, the candidate LOH indicates that the ldr instruction
905        // may use a direct access to the symbol. There is not such encoding
906        // for loads of byte and half.
907        if (!supportLoadFromLiteral(Candidate))
908          continue;
909
910        DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
911                     << '\n');
912        Kind = MCLOH_AdrpLdr;
913        Args.push_back(L1);
914        Args.push_back(Candidate);
915        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
916               "L1 already involved in LOH.");
917        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
918               "Candidate already involved in LOH.");
919        ++NumADRPToLDR;
920      } else {
921        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
922                     << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
923                     << '\n');
924
925        Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
926        Args.push_back(L1);
927        Args.push_back(L2);
928        Args.push_back(Candidate);
929
930        PotentialADROpportunities.remove(L2);
931        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
932               "L1 already involved in LOH.");
933        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
934               "L2 already involved in LOH.");
935        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
936               "Candidate already involved in LOH.");
937#ifdef DEBUG
938        // get the immediate of the load
939        if (Candidate->getOperand(2).getImm() == 0)
940          if (ImmediateDefOpc == AArch64::ADDXri)
941            ++NumADDToLDR;
942          else
943            ++NumLDRToLDR;
944        else if (ImmediateDefOpc == AArch64::ADDXri)
945          ++NumADDToLDRWithImm;
946        else
947          ++NumLDRToLDRWithImm;
948#endif // DEBUG
949      }
950    } else {
951      if (ImmediateDefOpc == AArch64::ADRP)
952        continue;
953      else {
954
955        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
956                     << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
957                     << '\n');
958
959        Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
960        Args.push_back(L1);
961        Args.push_back(L2);
962        Args.push_back(Candidate);
963
964        PotentialADROpportunities.remove(L2);
965        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
966               "L1 already involved in LOH.");
967        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
968               "L2 already involved in LOH.");
969        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
970               "Candidate already involved in LOH.");
971#ifdef DEBUG
972        // get the immediate of the store
973        if (Candidate->getOperand(2).getImm() == 0)
974          if (ImmediateDefOpc == AArch64::ADDXri)
975            ++NumADDToSTR;
976          else
977            ++NumLDRToSTR;
978        else if (ImmediateDefOpc == AArch64::ADDXri)
979          ++NumADDToSTRWithImm;
980        else
981          ++NumLDRToSTRWithImm;
982#endif // DEBUG
983      }
984    }
985    AArch64FI.addLOHDirective(Kind, Args);
986  }
987
988  // Now, we grabbed all the big patterns, check ADR opportunities.
989  for (const MachineInstr *Candidate : PotentialADROpportunities)
990    registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
991                         InvolvedInLOHs, RegToId);
992}
993
994/// Look for every register defined by potential LOHs candidates.
995/// Map these registers with dense id in @p RegToId and vice-versa in
996/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
997static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
998                               MapIdToReg &IdToReg,
999                               const TargetRegisterInfo *TRI) {
1000  unsigned CurRegId = 0;
1001  if (!PreCollectRegister) {
1002    unsigned NbReg = TRI->getNumRegs();
1003    for (; CurRegId < NbReg; ++CurRegId) {
1004      RegToId[CurRegId] = CurRegId;
1005      DEBUG(IdToReg.push_back(CurRegId));
1006      DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1007    }
1008    return;
1009  }
1010
1011  DEBUG(dbgs() << "** Collect Involved Register\n");
1012  for (const auto &MBB : MF) {
1013    for (const MachineInstr &MI : MBB) {
1014      if (!canDefBePartOfLOH(&MI) &&
1015          !isCandidateLoad(&MI) && !isCandidateStore(&MI))
1016        continue;
1017
1018      // Process defs
1019      for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
1020                                            IOEnd = MI.operands_end();
1021           IO != IOEnd; ++IO) {
1022        if (!IO->isReg() || !IO->isDef())
1023          continue;
1024        unsigned CurReg = IO->getReg();
1025        for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1026          if (RegToId.find(*AI) == RegToId.end()) {
1027            DEBUG(IdToReg.push_back(*AI);
1028                  assert(IdToReg[CurRegId] == *AI &&
1029                         "Reg index mismatches insertion index."));
1030            RegToId[*AI] = CurRegId++;
1031            DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1032          }
1033      }
1034    }
1035  }
1036}
1037
1038bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
1039  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1040  const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1041
1042  MapRegToId RegToId;
1043  MapIdToReg IdToReg;
1044  AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
1045  assert(AArch64FI && "No MachineFunctionInfo for this function!");
1046
1047  DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
1048
1049  collectInvolvedReg(MF, RegToId, IdToReg, TRI);
1050  if (RegToId.empty())
1051    return false;
1052
1053  MachineInstr *DummyOp = nullptr;
1054  if (BasicBlockScopeOnly) {
1055    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1056    // For local analysis, create a dummy operation to record uses that are not
1057    // local.
1058    DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
1059  }
1060
1061  unsigned NbReg = RegToId.size();
1062  bool Modified = false;
1063
1064  // Start with ADRP.
1065  InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
1066
1067  // Compute the reaching def in ADRP mode, meaning ADRP definitions
1068  // are first considered as uses.
1069  reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
1070  DEBUG(dbgs() << "ADRP reaching defs\n");
1071  DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1072
1073  // Translate the definition to uses map into a use to definitions map to ease
1074  // statistic computation.
1075  InstrToInstrs ADRPToReachingDefs;
1076  reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1077
1078  // Compute LOH for ADRP.
1079  computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
1080  delete[] ColorOpToReachedUses;
1081
1082  // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
1083  ColorOpToReachedUses = new InstrToInstrs[NbReg];
1084
1085  // first perform a regular reaching def analysis.
1086  reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
1087  DEBUG(dbgs() << "All reaching defs\n");
1088  DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1089
1090  // Turn that into a use to defs to ease statistic computation.
1091  InstrToInstrs UsesToReachingDefs;
1092  reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1093
1094  // Compute other than AdrpAdrp LOH.
1095  computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
1096                MDT);
1097  delete[] ColorOpToReachedUses;
1098
1099  if (BasicBlockScopeOnly)
1100    MF.DeleteMachineInstr(DummyOp);
1101
1102  return Modified;
1103}
1104
1105/// createAArch64CollectLOHPass - returns an instance of the Statistic for
1106/// linker optimization pass.
1107FunctionPass *llvm::createAArch64CollectLOHPass() {
1108  return new AArch64CollectLOH();
1109}
1110