1//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that collect the Linker Optimization Hint (LOH). 11// This pass should be run at the very end of the compilation flow, just before 12// assembly printer. 13// To be useful for the linker, the LOH must be printed into the assembly file. 14// 15// A LOH describes a sequence of instructions that may be optimized by the 16// linker. 17// This same sequence cannot be optimized by the compiler because some of 18// the information will be known at link time. 19// For instance, consider the following sequence: 20// L1: adrp xA, sym@PAGE 21// L2: add xB, xA, sym@PAGEOFF 22// L3: ldr xC, [xB, #imm] 23// This sequence can be turned into: 24// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: 25// L3: ldr xC, sym+#imm 26// It may also be turned into either the following more efficient 27// code sequences: 28// - If sym@PAGEOFF + #imm fits the encoding space of L3. 29// L1: adrp xA, sym@PAGE 30// L3: ldr xC, [xB, sym@PAGEOFF + #imm] 31// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: 32// L1: adr xA, sym 33// L3: ldr xC, [xB, #imm] 34// 35// To be valid a LOH must meet all the requirements needed by all the related 36// possible linker transformations. 37// For instance, using the running example, the constraints to emit 38// ".loh AdrpAddLdr" are: 39// - L1, L2, and L3 instructions are of the expected type, i.e., 40// respectively ADRP, ADD (immediate), and LD. 41// - The result of L1 is used only by L2. 42// - The register argument (xA) used in the ADD instruction is defined 43// only by L1. 44// - The result of L2 is used only by L3. 45// - The base address (xB) in L3 is defined only L2. 46// - The ADRP in L1 and the ADD in L2 must reference the same symbol using 47// @PAGE/@PAGEOFF with no additional constants 48// 49// Currently supported LOHs are: 50// * So called non-ADRP-related: 51// - .loh AdrpAddLdr L1, L2, L3: 52// L1: adrp xA, sym@PAGE 53// L2: add xB, xA, sym@PAGEOFF 54// L3: ldr xC, [xB, #imm] 55// - .loh AdrpLdrGotLdr L1, L2, L3: 56// L1: adrp xA, sym@GOTPAGE 57// L2: ldr xB, [xA, sym@GOTPAGEOFF] 58// L3: ldr xC, [xB, #imm] 59// - .loh AdrpLdr L1, L3: 60// L1: adrp xA, sym@PAGE 61// L3: ldr xC, [xA, sym@PAGEOFF] 62// - .loh AdrpAddStr L1, L2, L3: 63// L1: adrp xA, sym@PAGE 64// L2: add xB, xA, sym@PAGEOFF 65// L3: str xC, [xB, #imm] 66// - .loh AdrpLdrGotStr L1, L2, L3: 67// L1: adrp xA, sym@GOTPAGE 68// L2: ldr xB, [xA, sym@GOTPAGEOFF] 69// L3: str xC, [xB, #imm] 70// - .loh AdrpAdd L1, L2: 71// L1: adrp xA, sym@PAGE 72// L2: add xB, xA, sym@PAGEOFF 73// For all these LOHs, L1, L2, L3 form a simple chain: 74// L1 result is used only by L2 and L2 result by L3. 75// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument 76// by L1. 77// All these LOHs aim at using more efficient load/store patterns by folding 78// some instructions used to compute the address directly into the load/store. 79// 80// * So called ADRP-related: 81// - .loh AdrpAdrp L2, L1: 82// L2: ADRP xA, sym1@PAGE 83// L1: ADRP xA, sym2@PAGE 84// L2 dominates L1 and xA is not redifined between L2 and L1 85// This LOH aims at getting rid of redundant ADRP instructions. 86// 87// The overall design for emitting the LOHs is: 88// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo. 89// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it: 90// 1. Associates them a label. 91// 2. Emits them in a MCStreamer (EmitLOHDirective). 92// - The MCMachOStreamer records them into the MCAssembler. 93// - The MCAsmStreamer prints them. 94// - Other MCStreamers ignore them. 95// 3. Closes the MCStreamer: 96// - The MachObjectWriter gets them from the MCAssembler and writes 97// them in the object file. 98// - Other ObjectWriters ignore them. 99//===----------------------------------------------------------------------===// 100 101#include "AArch64.h" 102#include "AArch64InstrInfo.h" 103#include "AArch64MachineFunctionInfo.h" 104#include "AArch64Subtarget.h" 105#include "MCTargetDesc/AArch64AddressingModes.h" 106#include "llvm/ADT/BitVector.h" 107#include "llvm/ADT/DenseMap.h" 108#include "llvm/ADT/MapVector.h" 109#include "llvm/ADT/SetVector.h" 110#include "llvm/ADT/SmallVector.h" 111#include "llvm/ADT/Statistic.h" 112#include "llvm/CodeGen/MachineBasicBlock.h" 113#include "llvm/CodeGen/MachineDominators.h" 114#include "llvm/CodeGen/MachineFunctionPass.h" 115#include "llvm/CodeGen/MachineInstr.h" 116#include "llvm/CodeGen/MachineInstrBuilder.h" 117#include "llvm/Support/CommandLine.h" 118#include "llvm/Support/Debug.h" 119#include "llvm/Support/ErrorHandling.h" 120#include "llvm/Support/raw_ostream.h" 121#include "llvm/Target/TargetInstrInfo.h" 122#include "llvm/Target/TargetMachine.h" 123#include "llvm/Target/TargetRegisterInfo.h" 124using namespace llvm; 125 126#define DEBUG_TYPE "aarch64-collect-loh" 127 128static cl::opt<bool> 129PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, 130 cl::desc("Restrict analysis to registers invovled" 131 " in LOHs"), 132 cl::init(true)); 133 134static cl::opt<bool> 135BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, 136 cl::desc("Restrict analysis at basic block scope"), 137 cl::init(true)); 138 139STATISTIC(NumADRPSimpleCandidate, 140 "Number of simplifiable ADRP dominate by another"); 141STATISTIC(NumADRPComplexCandidate2, 142 "Number of simplifiable ADRP reachable by 2 defs"); 143STATISTIC(NumADRPComplexCandidate3, 144 "Number of simplifiable ADRP reachable by 3 defs"); 145STATISTIC(NumADRPComplexCandidateOther, 146 "Number of simplifiable ADRP reachable by 4 or more defs"); 147STATISTIC(NumADDToSTRWithImm, 148 "Number of simplifiable STR with imm reachable by ADD"); 149STATISTIC(NumLDRToSTRWithImm, 150 "Number of simplifiable STR with imm reachable by LDR"); 151STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); 152STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); 153STATISTIC(NumADDToLDRWithImm, 154 "Number of simplifiable LDR with imm reachable by ADD"); 155STATISTIC(NumLDRToLDRWithImm, 156 "Number of simplifiable LDR with imm reachable by LDR"); 157STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); 158STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); 159STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); 160STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); 161STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); 162STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); 163STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); 164STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); 165STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); 166 167namespace llvm { 168void initializeAArch64CollectLOHPass(PassRegistry &); 169} 170 171#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" 172 173namespace { 174struct AArch64CollectLOH : public MachineFunctionPass { 175 static char ID; 176 AArch64CollectLOH() : MachineFunctionPass(ID) { 177 initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); 178 } 179 180 bool runOnMachineFunction(MachineFunction &MF) override; 181 182 const char *getPassName() const override { 183 return AARCH64_COLLECT_LOH_NAME; 184 } 185 186 void getAnalysisUsage(AnalysisUsage &AU) const override { 187 AU.setPreservesAll(); 188 MachineFunctionPass::getAnalysisUsage(AU); 189 AU.addRequired<MachineDominatorTree>(); 190 } 191 192private: 193}; 194 195/// A set of MachineInstruction. 196typedef SetVector<const MachineInstr *> SetOfMachineInstr; 197/// Map a basic block to a set of instructions per register. 198/// This is used to represent the exposed uses of a basic block 199/// per register. 200typedef MapVector<const MachineBasicBlock *, 201 std::unique_ptr<SetOfMachineInstr[]>> 202BlockToSetOfInstrsPerColor; 203/// Map a basic block to an instruction per register. 204/// This is used to represent the live-out definitions of a basic block 205/// per register. 206typedef MapVector<const MachineBasicBlock *, 207 std::unique_ptr<const MachineInstr *[]>> 208BlockToInstrPerColor; 209/// Map an instruction to a set of instructions. Used to represent the 210/// mapping def to reachable uses or use to definitions. 211typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs; 212/// Map a basic block to a BitVector. 213/// This is used to record the kill registers per basic block. 214typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet; 215 216/// Map a register to a dense id. 217typedef DenseMap<unsigned, unsigned> MapRegToId; 218/// Map a dense id to a register. Used for debug purposes. 219typedef SmallVector<unsigned, 32> MapIdToReg; 220} // end anonymous namespace. 221 222char AArch64CollectLOH::ID = 0; 223 224INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", 225 AARCH64_COLLECT_LOH_NAME, false, false) 226INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 227INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", 228 AARCH64_COLLECT_LOH_NAME, false, false) 229 230/// Given a couple (MBB, reg) get the corresponding set of instruction from 231/// the given "sets". 232/// If this couple does not reference any set, an empty set is added to "sets" 233/// for this couple and returned. 234/// \param nbRegs is used internally allocate some memory. It must be consistent 235/// with the way sets is used. 236static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, 237 const MachineBasicBlock &MBB, unsigned reg, 238 unsigned nbRegs) { 239 SetOfMachineInstr *result; 240 BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); 241 if (it != sets.end()) 242 result = it->second.get(); 243 else 244 result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get(); 245 246 return result[reg]; 247} 248 249/// Given a couple (reg, MI) get the corresponding set of instructions from the 250/// the given "sets". 251/// This is used to get the uses record in sets of a definition identified by 252/// MI and reg, i.e., MI defines reg. 253/// If the couple does not reference anything, an empty set is added to 254/// "sets[reg]". 255/// \pre set[reg] is valid. 256static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, 257 const MachineInstr &MI) { 258 return sets[reg][&MI]; 259} 260 261/// Same as getUses but does not modify the input map: sets. 262/// \return NULL if the couple (reg, MI) is not in sets. 263static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, 264 const MachineInstr &MI) { 265 InstrToInstrs::const_iterator Res = sets[reg].find(&MI); 266 if (Res != sets[reg].end()) 267 return &(Res->second); 268 return nullptr; 269} 270 271/// Initialize the reaching definition algorithm: 272/// For each basic block BB in MF, record: 273/// - its kill set. 274/// - its reachable uses (uses that are exposed to BB's predecessors). 275/// - its the generated definitions. 276/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to 277/// the list of uses of exposed defintions. 278/// \param ADRPMode specifies to only consider ADRP instructions for generated 279/// definition. It also consider definitions of ADRP instructions as uses and 280/// ignore other uses. The ADRPMode is used to collect the information for LHO 281/// that involve ADRP operation only. 282static void initReachingDef(const MachineFunction &MF, 283 InstrToInstrs *ColorOpToReachedUses, 284 BlockToInstrPerColor &Gen, BlockToRegSet &Kill, 285 BlockToSetOfInstrsPerColor &ReachableUses, 286 const MapRegToId &RegToId, 287 const MachineInstr *DummyOp, bool ADRPMode) { 288 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 289 unsigned NbReg = RegToId.size(); 290 291 for (const MachineBasicBlock &MBB : MF) { 292 auto &BBGen = Gen[&MBB]; 293 BBGen = make_unique<const MachineInstr *[]>(NbReg); 294 std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); 295 296 BitVector &BBKillSet = Kill[&MBB]; 297 BBKillSet.resize(NbReg); 298 for (const MachineInstr &MI : MBB) { 299 bool IsADRP = MI.getOpcode() == AArch64::ADRP; 300 301 // Process uses first. 302 if (IsADRP || !ADRPMode) 303 for (const MachineOperand &MO : MI.operands()) { 304 // Treat ADRP def as use, as the goal of the analysis is to find 305 // ADRP defs reached by other ADRP defs. 306 if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || 307 (ADRPMode && (!IsADRP || !MO.isDef()))) 308 continue; 309 unsigned CurReg = MO.getReg(); 310 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); 311 if (ItCurRegId == RegToId.end()) 312 continue; 313 CurReg = ItCurRegId->second; 314 315 // if CurReg has not been defined, this use is reachable. 316 if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) 317 getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); 318 // current basic block definition for this color, if any, is in Gen. 319 if (BBGen[CurReg]) 320 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); 321 } 322 323 // Process clobbers. 324 for (const MachineOperand &MO : MI.operands()) { 325 if (!MO.isRegMask()) 326 continue; 327 // Clobbers kill the related colors. 328 const uint32_t *PreservedRegs = MO.getRegMask(); 329 330 // Set generated regs. 331 for (const auto &Entry : RegToId) { 332 unsigned Reg = Entry.second; 333 // Use the global register ID when querying APIs external to this 334 // pass. 335 if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { 336 // Do not register clobbered definition for no ADRP. 337 // This definition is not used anyway (otherwise register 338 // allocation is wrong). 339 BBGen[Reg] = ADRPMode ? &MI : nullptr; 340 BBKillSet.set(Reg); 341 } 342 } 343 } 344 345 // Process register defs. 346 for (const MachineOperand &MO : MI.operands()) { 347 if (!MO.isReg() || !MO.isDef()) 348 continue; 349 unsigned CurReg = MO.getReg(); 350 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); 351 if (ItCurRegId == RegToId.end()) 352 continue; 353 354 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { 355 MapRegToId::const_iterator ItRegId = RegToId.find(*AI); 356 // If this alias has not been recorded, then it is not interesting 357 // for the current analysis. 358 // We can end up in this situation because of tuple registers. 359 // E.g., Let say we are interested in S1. When we register 360 // S1, we will also register its aliases and in particular 361 // the tuple Q1_Q2. 362 // Now, when we encounter Q1_Q2, we will look through its aliases 363 // and will find that S2 is not registered. 364 if (ItRegId == RegToId.end()) 365 continue; 366 367 BBKillSet.set(ItRegId->second); 368 BBGen[ItRegId->second] = &MI; 369 } 370 BBGen[ItCurRegId->second] = &MI; 371 } 372 } 373 374 // If we restrict our analysis to basic block scope, conservatively add a 375 // dummy 376 // use for each generated value. 377 if (!ADRPMode && DummyOp && !MBB.succ_empty()) 378 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) 379 if (BBGen[CurReg]) 380 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); 381 } 382} 383 384/// Reaching def core algorithm: 385/// while an Out has changed 386/// for each bb 387/// for each color 388/// In[bb][color] = U Out[bb.predecessors][color] 389/// insert reachableUses[bb][color] in each in[bb][color] 390/// op.reachedUses 391/// 392/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) 393static void reachingDefAlgorithm(const MachineFunction &MF, 394 InstrToInstrs *ColorOpToReachedUses, 395 BlockToSetOfInstrsPerColor &In, 396 BlockToSetOfInstrsPerColor &Out, 397 BlockToInstrPerColor &Gen, BlockToRegSet &Kill, 398 BlockToSetOfInstrsPerColor &ReachableUses, 399 unsigned NbReg) { 400 bool HasChanged; 401 do { 402 HasChanged = false; 403 for (const MachineBasicBlock &MBB : MF) { 404 unsigned CurReg; 405 for (CurReg = 0; CurReg < NbReg; ++CurReg) { 406 SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); 407 SetOfMachineInstr &BBReachableUses = 408 getSet(ReachableUses, MBB, CurReg, NbReg); 409 SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); 410 unsigned Size = BBOutSet.size(); 411 // In[bb][color] = U Out[bb.predecessors][color] 412 for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { 413 SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); 414 BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); 415 } 416 // insert reachableUses[bb][color] in each in[bb][color] op.reachedses 417 for (const MachineInstr *MI : BBInSet) { 418 SetOfMachineInstr &OpReachedUses = 419 getUses(ColorOpToReachedUses, CurReg, *MI); 420 OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); 421 } 422 // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) 423 if (!Kill[&MBB].test(CurReg)) 424 BBOutSet.insert(BBInSet.begin(), BBInSet.end()); 425 if (Gen[&MBB][CurReg]) 426 BBOutSet.insert(Gen[&MBB][CurReg]); 427 HasChanged |= BBOutSet.size() != Size; 428 } 429 } 430 } while (HasChanged); 431} 432 433/// Reaching definition algorithm. 434/// \param MF function on which the algorithm will operate. 435/// \param[out] ColorOpToReachedUses will contain the result of the reaching 436/// def algorithm. 437/// \param ADRPMode specify whether the reaching def algorithm should be tuned 438/// for ADRP optimization. \see initReachingDef for more details. 439/// \param DummyOp if not NULL, the algorithm will work at 440/// basic block scope and will set for every exposed definition a use to 441/// @p DummyOp. 442/// \pre ColorOpToReachedUses is an array of at least number of registers of 443/// InstrToInstrs. 444static void reachingDef(const MachineFunction &MF, 445 InstrToInstrs *ColorOpToReachedUses, 446 const MapRegToId &RegToId, bool ADRPMode = false, 447 const MachineInstr *DummyOp = nullptr) { 448 // structures: 449 // For each basic block. 450 // Out: a set per color of definitions that reach the 451 // out boundary of this block. 452 // In: Same as Out but for in boundary. 453 // Gen: generated color in this block (one operation per color). 454 // Kill: register set of killed color in this block. 455 // ReachableUses: a set per color of uses (operation) reachable 456 // for "In" definitions. 457 BlockToSetOfInstrsPerColor Out, In, ReachableUses; 458 BlockToInstrPerColor Gen; 459 BlockToRegSet Kill; 460 461 // Initialize Gen, kill and reachableUses. 462 initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, 463 DummyOp, ADRPMode); 464 465 // Algo. 466 if (!DummyOp) 467 reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, 468 ReachableUses, RegToId.size()); 469} 470 471#ifndef NDEBUG 472/// print the result of the reaching definition algorithm. 473static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, 474 unsigned NbReg, const TargetRegisterInfo *TRI, 475 const MapIdToReg &IdToReg) { 476 unsigned CurReg; 477 for (CurReg = 0; CurReg < NbReg; ++CurReg) { 478 if (ColorOpToReachedUses[CurReg].empty()) 479 continue; 480 DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); 481 482 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { 483 DEBUG(dbgs() << "Def:\n"); 484 DEBUG(DefsIt.first->print(dbgs())); 485 DEBUG(dbgs() << "Reachable uses:\n"); 486 for (const MachineInstr *MI : DefsIt.second) { 487 DEBUG(MI->print(dbgs())); 488 } 489 } 490 } 491} 492#endif // NDEBUG 493 494/// Answer the following question: Can Def be one of the definition 495/// involved in a part of a LOH? 496static bool canDefBePartOfLOH(const MachineInstr *Def) { 497 unsigned Opc = Def->getOpcode(); 498 // Accept ADRP, ADDLow and LOADGot. 499 switch (Opc) { 500 default: 501 return false; 502 case AArch64::ADRP: 503 return true; 504 case AArch64::ADDXri: 505 // Check immediate to see if the immediate is an address. 506 switch (Def->getOperand(2).getType()) { 507 default: 508 return false; 509 case MachineOperand::MO_GlobalAddress: 510 case MachineOperand::MO_JumpTableIndex: 511 case MachineOperand::MO_ConstantPoolIndex: 512 case MachineOperand::MO_BlockAddress: 513 return true; 514 } 515 case AArch64::LDRXui: 516 // Check immediate to see if the immediate is an address. 517 switch (Def->getOperand(2).getType()) { 518 default: 519 return false; 520 case MachineOperand::MO_GlobalAddress: 521 return true; 522 } 523 } 524 // Unreachable. 525 return false; 526} 527 528/// Check whether the given instruction can the end of a LOH chain involving a 529/// store. 530static bool isCandidateStore(const MachineInstr *Instr) { 531 switch (Instr->getOpcode()) { 532 default: 533 return false; 534 case AArch64::STRBBui: 535 case AArch64::STRHHui: 536 case AArch64::STRBui: 537 case AArch64::STRHui: 538 case AArch64::STRWui: 539 case AArch64::STRXui: 540 case AArch64::STRSui: 541 case AArch64::STRDui: 542 case AArch64::STRQui: 543 // In case we have str xA, [xA, #imm], this is two different uses 544 // of xA and we cannot fold, otherwise the xA stored may be wrong, 545 // even if #imm == 0. 546 if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) 547 return true; 548 } 549 return false; 550} 551 552/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, 553/// Build the Use to Defs information and filter out obvious non-LOH candidates. 554/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. 555/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, 556/// i.e., no simple chain. 557/// \param ADRPMode -- \see initReachingDef. 558static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, 559 const InstrToInstrs *ColorOpToReachedUses, 560 const MapRegToId &RegToId, 561 bool ADRPMode = false) { 562 563 SetOfMachineInstr NotCandidate; 564 unsigned NbReg = RegToId.size(); 565 MapRegToId::const_iterator EndIt = RegToId.end(); 566 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { 567 // If this color is never defined, continue. 568 if (ColorOpToReachedUses[CurReg].empty()) 569 continue; 570 571 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { 572 for (const MachineInstr *MI : DefsIt.second) { 573 const MachineInstr *Def = DefsIt.first; 574 MapRegToId::const_iterator It; 575 // if all the reaching defs are not adrp, this use will not be 576 // simplifiable. 577 if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || 578 (!ADRPMode && !canDefBePartOfLOH(Def)) || 579 (!ADRPMode && isCandidateStore(MI) && 580 // store are LOH candidate iff the end of the chain is used as 581 // base. 582 ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || 583 It->second != CurReg))) { 584 NotCandidate.insert(MI); 585 continue; 586 } 587 // Do not consider self reaching as a simplifiable case for ADRP. 588 if (!ADRPMode || MI != DefsIt.first) { 589 UseToReachingDefs[MI].insert(DefsIt.first); 590 // If UsesIt has several reaching definitions, it is not 591 // candidate for simplificaton in non-ADRPMode. 592 if (!ADRPMode && UseToReachingDefs[MI].size() > 1) 593 NotCandidate.insert(MI); 594 } 595 } 596 } 597 } 598 for (const MachineInstr *Elem : NotCandidate) { 599 DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); 600 // It would have been better if we could just remove the entry 601 // from the map. Because of that, we have to filter the garbage 602 // (second.empty) in the subsequence analysis. 603 UseToReachingDefs[Elem].clear(); 604 } 605} 606 607/// Based on the use to defs information (in ADRPMode), compute the 608/// opportunities of LOH ADRP-related. 609static void computeADRP(const InstrToInstrs &UseToDefs, 610 AArch64FunctionInfo &AArch64FI, 611 const MachineDominatorTree *MDT) { 612 DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); 613 for (const auto &Entry : UseToDefs) { 614 unsigned Size = Entry.second.size(); 615 if (Size == 0) 616 continue; 617 if (Size == 1) { 618 const MachineInstr *L2 = *Entry.second.begin(); 619 const MachineInstr *L1 = Entry.first; 620 if (!MDT->dominates(L2, L1)) { 621 DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 622 << '\n'); 623 continue; 624 } 625 DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); 626 SmallVector<const MachineInstr *, 2> Args; 627 Args.push_back(L2); 628 Args.push_back(L1); 629 AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); 630 ++NumADRPSimpleCandidate; 631 } 632#ifdef DEBUG 633 else if (Size == 2) 634 ++NumADRPComplexCandidate2; 635 else if (Size == 3) 636 ++NumADRPComplexCandidate3; 637 else 638 ++NumADRPComplexCandidateOther; 639#endif 640 // if Size < 1, the use should have been removed from the candidates 641 assert(Size >= 1 && "No reaching defs for that use!"); 642 } 643} 644 645/// Check whether the given instruction can be the end of a LOH chain 646/// involving a load. 647static bool isCandidateLoad(const MachineInstr *Instr) { 648 switch (Instr->getOpcode()) { 649 default: 650 return false; 651 case AArch64::LDRSBWui: 652 case AArch64::LDRSBXui: 653 case AArch64::LDRSHWui: 654 case AArch64::LDRSHXui: 655 case AArch64::LDRSWui: 656 case AArch64::LDRBui: 657 case AArch64::LDRHui: 658 case AArch64::LDRWui: 659 case AArch64::LDRXui: 660 case AArch64::LDRSui: 661 case AArch64::LDRDui: 662 case AArch64::LDRQui: 663 if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) 664 return false; 665 return true; 666 } 667 // Unreachable. 668 return false; 669} 670 671/// Check whether the given instruction can load a litteral. 672static bool supportLoadFromLiteral(const MachineInstr *Instr) { 673 switch (Instr->getOpcode()) { 674 default: 675 return false; 676 case AArch64::LDRSWui: 677 case AArch64::LDRWui: 678 case AArch64::LDRXui: 679 case AArch64::LDRSui: 680 case AArch64::LDRDui: 681 case AArch64::LDRQui: 682 return true; 683 } 684 // Unreachable. 685 return false; 686} 687 688/// Check whether the given instruction is a LOH candidate. 689/// \param UseToDefs is used to check that Instr is at the end of LOH supported 690/// chain. 691/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are 692/// already been filtered out. 693static bool isCandidate(const MachineInstr *Instr, 694 const InstrToInstrs &UseToDefs, 695 const MachineDominatorTree *MDT) { 696 if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) 697 return false; 698 699 const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); 700 if (Def->getOpcode() != AArch64::ADRP) { 701 // At this point, Def is ADDXri or LDRXui of the right type of 702 // symbol, because we filtered out the uses that were not defined 703 // by these kind of instructions (+ ADRP). 704 705 // Check if this forms a simple chain: each intermediate node must 706 // dominates the next one. 707 if (!MDT->dominates(Def, Instr)) 708 return false; 709 // Move one node up in the simple chain. 710 if (UseToDefs.find(Def) == 711 UseToDefs.end() 712 // The map may contain garbage we have to ignore. 713 || 714 UseToDefs.find(Def)->second.empty()) 715 return false; 716 Instr = Def; 717 Def = *UseToDefs.find(Def)->second.begin(); 718 } 719 // Check if we reached the top of the simple chain: 720 // - top is ADRP. 721 // - check the simple chain property: each intermediate node must 722 // dominates the next one. 723 if (Def->getOpcode() == AArch64::ADRP) 724 return MDT->dominates(Def, Instr); 725 return false; 726} 727 728static bool registerADRCandidate(const MachineInstr &Use, 729 const InstrToInstrs &UseToDefs, 730 const InstrToInstrs *DefsPerColorToUses, 731 AArch64FunctionInfo &AArch64FI, 732 SetOfMachineInstr *InvolvedInLOHs, 733 const MapRegToId &RegToId) { 734 // Look for opportunities to turn ADRP -> ADD or 735 // ADRP -> LDR GOTPAGEOFF into ADR. 736 // If ADRP has more than one use. Give up. 737 if (Use.getOpcode() != AArch64::ADDXri && 738 (Use.getOpcode() != AArch64::LDRXui || 739 !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) 740 return false; 741 InstrToInstrs::const_iterator It = UseToDefs.find(&Use); 742 // The map may contain garbage that we need to ignore. 743 if (It == UseToDefs.end() || It->second.empty()) 744 return false; 745 const MachineInstr &Def = **It->second.begin(); 746 if (Def.getOpcode() != AArch64::ADRP) 747 return false; 748 // Check the number of users of ADRP. 749 const SetOfMachineInstr *Users = 750 getUses(DefsPerColorToUses, 751 RegToId.find(Def.getOperand(0).getReg())->second, Def); 752 if (Users->size() > 1) { 753 ++NumADRComplexCandidate; 754 return false; 755 } 756 ++NumADRSimpleCandidate; 757 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && 758 "ADRP already involved in LOH."); 759 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && 760 "ADD already involved in LOH."); 761 DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); 762 763 SmallVector<const MachineInstr *, 2> Args; 764 Args.push_back(&Def); 765 Args.push_back(&Use); 766 767 AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd 768 : MCLOH_AdrpLdrGot, 769 Args); 770 return true; 771} 772 773/// Based on the use to defs information (in non-ADRPMode), compute the 774/// opportunities of LOH non-ADRP-related 775static void computeOthers(const InstrToInstrs &UseToDefs, 776 const InstrToInstrs *DefsPerColorToUses, 777 AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, 778 const MachineDominatorTree *MDT) { 779 SetOfMachineInstr *InvolvedInLOHs = nullptr; 780#ifdef DEBUG 781 SetOfMachineInstr InvolvedInLOHsStorage; 782 InvolvedInLOHs = &InvolvedInLOHsStorage; 783#endif // DEBUG 784 DEBUG(dbgs() << "*** Compute LOH for Others\n"); 785 // ADRP -> ADD/LDR -> LDR/STR pattern. 786 // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. 787 788 // FIXME: When the statistics are not important, 789 // This initial filtering loop can be merged into the next loop. 790 // Currently, we didn't do it to have the same code for both DEBUG and 791 // NDEBUG builds. Indeed, the iterator of the second loop would need 792 // to be changed. 793 SetOfMachineInstr PotentialCandidates; 794 SetOfMachineInstr PotentialADROpportunities; 795 for (auto &Use : UseToDefs) { 796 // If no definition is available, this is a non candidate. 797 if (Use.second.empty()) 798 continue; 799 // Keep only instructions that are load or store and at the end of 800 // a ADRP -> ADD/LDR/Nothing chain. 801 // We already filtered out the no-chain cases. 802 if (!isCandidate(Use.first, UseToDefs, MDT)) { 803 PotentialADROpportunities.insert(Use.first); 804 continue; 805 } 806 PotentialCandidates.insert(Use.first); 807 } 808 809 // Make the following distinctions for statistics as the linker does 810 // know how to decode instructions: 811 // - ADD/LDR/Nothing make there different patterns. 812 // - LDR/STR make two different patterns. 813 // Hence, 6 - 1 base patterns. 814 // (because ADRP-> Nothing -> STR is not simplifiable) 815 816 // The linker is only able to have a simple semantic, i.e., if pattern A 817 // do B. 818 // However, we want to see the opportunity we may miss if we were able to 819 // catch more complex cases. 820 821 // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> 822 // A potential candidate becomes a candidate, if its current immediate 823 // operand is zero and all nodes of the chain have respectively only one user 824#ifdef DEBUG 825 SetOfMachineInstr DefsOfPotentialCandidates; 826#endif 827 for (const MachineInstr *Candidate : PotentialCandidates) { 828 // Get the definition of the candidate i.e., ADD or LDR. 829 const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); 830 // Record the elements of the chain. 831 const MachineInstr *L1 = Def; 832 const MachineInstr *L2 = nullptr; 833 unsigned ImmediateDefOpc = Def->getOpcode(); 834 if (Def->getOpcode() != AArch64::ADRP) { 835 // Check the number of users of this node. 836 const SetOfMachineInstr *Users = 837 getUses(DefsPerColorToUses, 838 RegToId.find(Def->getOperand(0).getReg())->second, *Def); 839 if (Users->size() > 1) { 840#ifdef DEBUG 841 // if all the uses of this def are in potential candidate, this is 842 // a complex candidate of level 2. 843 bool IsLevel2 = true; 844 for (const MachineInstr *MI : *Users) { 845 if (!PotentialCandidates.count(MI)) { 846 ++NumTooCplxLvl2; 847 IsLevel2 = false; 848 break; 849 } 850 } 851 if (IsLevel2) 852 ++NumCplxLvl2; 853#endif // DEBUG 854 PotentialADROpportunities.insert(Def); 855 continue; 856 } 857 L2 = Def; 858 Def = *UseToDefs.find(Def)->second.begin(); 859 L1 = Def; 860 } // else the element in the middle of the chain is nothing, thus 861 // Def already contains the first element of the chain. 862 863 // Check the number of users of the first node in the chain, i.e., ADRP 864 const SetOfMachineInstr *Users = 865 getUses(DefsPerColorToUses, 866 RegToId.find(Def->getOperand(0).getReg())->second, *Def); 867 if (Users->size() > 1) { 868#ifdef DEBUG 869 // if all the uses of this def are in the defs of the potential candidate, 870 // this is a complex candidate of level 1 871 if (DefsOfPotentialCandidates.empty()) { 872 // lazy init 873 DefsOfPotentialCandidates = PotentialCandidates; 874 for (const MachineInstr *Candidate : PotentialCandidates) { 875 if (!UseToDefs.find(Candidate)->second.empty()) 876 DefsOfPotentialCandidates.insert( 877 *UseToDefs.find(Candidate)->second.begin()); 878 } 879 } 880 bool Found = false; 881 for (auto &Use : *Users) { 882 if (!DefsOfPotentialCandidates.count(Use)) { 883 ++NumTooCplxLvl1; 884 Found = true; 885 break; 886 } 887 } 888 if (!Found) 889 ++NumCplxLvl1; 890#endif // DEBUG 891 continue; 892 } 893 894 bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); 895 // If the chain is three instructions long and ldr is the second element, 896 // then this ldr must load form GOT, otherwise this is not a correct chain. 897 if (L2 && !IsL2Add && 898 !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)) 899 continue; 900 SmallVector<const MachineInstr *, 3> Args; 901 MCLOHType Kind; 902 if (isCandidateLoad(Candidate)) { 903 if (!L2) { 904 // At this point, the candidate LOH indicates that the ldr instruction 905 // may use a direct access to the symbol. There is not such encoding 906 // for loads of byte and half. 907 if (!supportLoadFromLiteral(Candidate)) 908 continue; 909 910 DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate 911 << '\n'); 912 Kind = MCLOH_AdrpLdr; 913 Args.push_back(L1); 914 Args.push_back(Candidate); 915 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && 916 "L1 already involved in LOH."); 917 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && 918 "Candidate already involved in LOH."); 919 ++NumADRPToLDR; 920 } else { 921 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") 922 << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate 923 << '\n'); 924 925 Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; 926 Args.push_back(L1); 927 Args.push_back(L2); 928 Args.push_back(Candidate); 929 930 PotentialADROpportunities.remove(L2); 931 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && 932 "L1 already involved in LOH."); 933 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && 934 "L2 already involved in LOH."); 935 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && 936 "Candidate already involved in LOH."); 937#ifdef DEBUG 938 // get the immediate of the load 939 if (Candidate->getOperand(2).getImm() == 0) 940 if (ImmediateDefOpc == AArch64::ADDXri) 941 ++NumADDToLDR; 942 else 943 ++NumLDRToLDR; 944 else if (ImmediateDefOpc == AArch64::ADDXri) 945 ++NumADDToLDRWithImm; 946 else 947 ++NumLDRToLDRWithImm; 948#endif // DEBUG 949 } 950 } else { 951 if (ImmediateDefOpc == AArch64::ADRP) 952 continue; 953 else { 954 955 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") 956 << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate 957 << '\n'); 958 959 Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; 960 Args.push_back(L1); 961 Args.push_back(L2); 962 Args.push_back(Candidate); 963 964 PotentialADROpportunities.remove(L2); 965 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && 966 "L1 already involved in LOH."); 967 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && 968 "L2 already involved in LOH."); 969 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && 970 "Candidate already involved in LOH."); 971#ifdef DEBUG 972 // get the immediate of the store 973 if (Candidate->getOperand(2).getImm() == 0) 974 if (ImmediateDefOpc == AArch64::ADDXri) 975 ++NumADDToSTR; 976 else 977 ++NumLDRToSTR; 978 else if (ImmediateDefOpc == AArch64::ADDXri) 979 ++NumADDToSTRWithImm; 980 else 981 ++NumLDRToSTRWithImm; 982#endif // DEBUG 983 } 984 } 985 AArch64FI.addLOHDirective(Kind, Args); 986 } 987 988 // Now, we grabbed all the big patterns, check ADR opportunities. 989 for (const MachineInstr *Candidate : PotentialADROpportunities) 990 registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, 991 InvolvedInLOHs, RegToId); 992} 993 994/// Look for every register defined by potential LOHs candidates. 995/// Map these registers with dense id in @p RegToId and vice-versa in 996/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. 997static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, 998 MapIdToReg &IdToReg, 999 const TargetRegisterInfo *TRI) { 1000 unsigned CurRegId = 0; 1001 if (!PreCollectRegister) { 1002 unsigned NbReg = TRI->getNumRegs(); 1003 for (; CurRegId < NbReg; ++CurRegId) { 1004 RegToId[CurRegId] = CurRegId; 1005 DEBUG(IdToReg.push_back(CurRegId)); 1006 DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); 1007 } 1008 return; 1009 } 1010 1011 DEBUG(dbgs() << "** Collect Involved Register\n"); 1012 for (const auto &MBB : MF) { 1013 for (const MachineInstr &MI : MBB) { 1014 if (!canDefBePartOfLOH(&MI) && 1015 !isCandidateLoad(&MI) && !isCandidateStore(&MI)) 1016 continue; 1017 1018 // Process defs 1019 for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), 1020 IOEnd = MI.operands_end(); 1021 IO != IOEnd; ++IO) { 1022 if (!IO->isReg() || !IO->isDef()) 1023 continue; 1024 unsigned CurReg = IO->getReg(); 1025 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) 1026 if (RegToId.find(*AI) == RegToId.end()) { 1027 DEBUG(IdToReg.push_back(*AI); 1028 assert(IdToReg[CurRegId] == *AI && 1029 "Reg index mismatches insertion index.")); 1030 RegToId[*AI] = CurRegId++; 1031 DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); 1032 } 1033 } 1034 } 1035 } 1036} 1037 1038bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { 1039 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1040 const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); 1041 1042 MapRegToId RegToId; 1043 MapIdToReg IdToReg; 1044 AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>(); 1045 assert(AArch64FI && "No MachineFunctionInfo for this function!"); 1046 1047 DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); 1048 1049 collectInvolvedReg(MF, RegToId, IdToReg, TRI); 1050 if (RegToId.empty()) 1051 return false; 1052 1053 MachineInstr *DummyOp = nullptr; 1054 if (BasicBlockScopeOnly) { 1055 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 1056 // For local analysis, create a dummy operation to record uses that are not 1057 // local. 1058 DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); 1059 } 1060 1061 unsigned NbReg = RegToId.size(); 1062 bool Modified = false; 1063 1064 // Start with ADRP. 1065 InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; 1066 1067 // Compute the reaching def in ADRP mode, meaning ADRP definitions 1068 // are first considered as uses. 1069 reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); 1070 DEBUG(dbgs() << "ADRP reaching defs\n"); 1071 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); 1072 1073 // Translate the definition to uses map into a use to definitions map to ease 1074 // statistic computation. 1075 InstrToInstrs ADRPToReachingDefs; 1076 reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); 1077 1078 // Compute LOH for ADRP. 1079 computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); 1080 delete[] ColorOpToReachedUses; 1081 1082 // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. 1083 ColorOpToReachedUses = new InstrToInstrs[NbReg]; 1084 1085 // first perform a regular reaching def analysis. 1086 reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); 1087 DEBUG(dbgs() << "All reaching defs\n"); 1088 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); 1089 1090 // Turn that into a use to defs to ease statistic computation. 1091 InstrToInstrs UsesToReachingDefs; 1092 reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); 1093 1094 // Compute other than AdrpAdrp LOH. 1095 computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, 1096 MDT); 1097 delete[] ColorOpToReachedUses; 1098 1099 if (BasicBlockScopeOnly) 1100 MF.DeleteMachineInstr(DummyOp); 1101 1102 return Modified; 1103} 1104 1105/// createAArch64CollectLOHPass - returns an instance of the Statistic for 1106/// linker optimization pass. 1107FunctionPass *llvm::createAArch64CollectLOHPass() { 1108 return new AArch64CollectLOH(); 1109} 1110