ARMConstantIslandPass.cpp revision 034de5f65f9139954cb01d94eaebbaefd294946e
1//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that splits the constant pool up into 'islands' 11// which are scattered through-out the function. This is required due to the 12// limited pc-relative displacements that ARM has. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "arm-cp-islands" 17#include "ARM.h" 18#include "ARMAddressingModes.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMInstrInfo.h" 21#include "llvm/CodeGen/MachineConstantPool.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineJumpTableInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/Support/Debug.h" 29#include "llvm/Support/ErrorHandling.h" 30#include "llvm/Support/raw_ostream.h" 31#include "llvm/ADT/SmallVector.h" 32#include "llvm/ADT/STLExtras.h" 33#include "llvm/ADT/Statistic.h" 34using namespace llvm; 35 36STATISTIC(NumCPEs, "Number of constpool entries"); 37STATISTIC(NumSplit, "Number of uncond branches inserted"); 38STATISTIC(NumCBrFixed, "Number of cond branches fixed"); 39STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); 40STATISTIC(NumTBs, "Number of table branches generated"); 41STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); 42STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); 43 44namespace { 45 /// ARMConstantIslands - Due to limited PC-relative displacements, ARM 46 /// requires constant pool entries to be scattered among the instructions 47 /// inside a function. To do this, it completely ignores the normal LLVM 48 /// constant pool; instead, it places constants wherever it feels like with 49 /// special instructions. 50 /// 51 /// The terminology used in this pass includes: 52 /// Islands - Clumps of constants placed in the function. 53 /// Water - Potential places where an island could be formed. 54 /// CPE - A constant pool entry that has been placed somewhere, which 55 /// tracks a list of users. 56 class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass { 57 /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed 58 /// by MBB Number. The two-byte pads required for Thumb alignment are 59 /// counted as part of the following block (i.e., the offset and size for 60 /// a padded block will both be ==2 mod 4). 61 std::vector<unsigned> BBSizes; 62 63 /// BBOffsets - the offset of each MBB in bytes, starting from 0. 64 /// The two-byte pads required for Thumb alignment are counted as part of 65 /// the following block. 66 std::vector<unsigned> BBOffsets; 67 68 /// WaterList - A sorted list of basic blocks where islands could be placed 69 /// (i.e. blocks that don't fall through to the following block, due 70 /// to a return, unreachable, or unconditional branch). 71 std::vector<MachineBasicBlock*> WaterList; 72 73 typedef std::vector<MachineBasicBlock*>::iterator water_iterator; 74 75 /// CPUser - One user of a constant pool, keeping the machine instruction 76 /// pointer, the constant pool being referenced, and the max displacement 77 /// allowed from the instruction to the CP. 78 struct CPUser { 79 MachineInstr *MI; 80 MachineInstr *CPEMI; 81 unsigned MaxDisp; 82 bool NegOk; 83 bool IsSoImm; 84 CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, 85 bool neg, bool soimm) 86 : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {} 87 }; 88 89 /// CPUsers - Keep track of all of the machine instructions that use various 90 /// constant pools and their max displacement. 91 std::vector<CPUser> CPUsers; 92 93 /// CPEntry - One per constant pool entry, keeping the machine instruction 94 /// pointer, the constpool index, and the number of CPUser's which 95 /// reference this entry. 96 struct CPEntry { 97 MachineInstr *CPEMI; 98 unsigned CPI; 99 unsigned RefCount; 100 CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) 101 : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} 102 }; 103 104 /// CPEntries - Keep track of all of the constant pool entry machine 105 /// instructions. For each original constpool index (i.e. those that 106 /// existed upon entry to this pass), it keeps a vector of entries. 107 /// Original elements are cloned as we go along; the clones are 108 /// put in the vector of the original element, but have distinct CPIs. 109 std::vector<std::vector<CPEntry> > CPEntries; 110 111 /// ImmBranch - One per immediate branch, keeping the machine instruction 112 /// pointer, conditional or unconditional, the max displacement, 113 /// and (if isCond is true) the corresponding unconditional branch 114 /// opcode. 115 struct ImmBranch { 116 MachineInstr *MI; 117 unsigned MaxDisp : 31; 118 bool isCond : 1; 119 int UncondBr; 120 ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr) 121 : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} 122 }; 123 124 /// ImmBranches - Keep track of all the immediate branch instructions. 125 /// 126 std::vector<ImmBranch> ImmBranches; 127 128 /// PushPopMIs - Keep track of all the Thumb push / pop instructions. 129 /// 130 SmallVector<MachineInstr*, 4> PushPopMIs; 131 132 /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions. 133 SmallVector<MachineInstr*, 4> T2JumpTables; 134 135 /// HasFarJump - True if any far jump instruction has been emitted during 136 /// the branch fix up pass. 137 bool HasFarJump; 138 139 const TargetInstrInfo *TII; 140 const ARMSubtarget *STI; 141 ARMFunctionInfo *AFI; 142 bool isThumb; 143 bool isThumb1; 144 bool isThumb2; 145 public: 146 static char ID; 147 ARMConstantIslands() : MachineFunctionPass(&ID) {} 148 149 virtual bool runOnMachineFunction(MachineFunction &MF); 150 151 virtual const char *getPassName() const { 152 return "ARM constant island placement and branch shortening pass"; 153 } 154 155 private: 156 void DoInitialPlacement(MachineFunction &MF, 157 std::vector<MachineInstr*> &CPEMIs); 158 CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); 159 void InitialFunctionScan(MachineFunction &MF, 160 const std::vector<MachineInstr*> &CPEMIs); 161 MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); 162 void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); 163 void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); 164 bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); 165 int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); 166 bool LookForWater(CPUser&U, unsigned UserOffset, 167 MachineBasicBlock** NewMBB); 168 MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB, 169 water_iterator IP); 170 void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, 171 MachineBasicBlock** NewMBB); 172 bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); 173 void RemoveDeadCPEMI(MachineInstr *CPEMI); 174 bool RemoveUnusedCPEntries(); 175 bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, 176 MachineInstr *CPEMI, unsigned Disp, bool NegOk, 177 bool DoDump = false); 178 bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, 179 CPUser &U); 180 bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, 181 unsigned Disp, bool NegativeOK, bool IsSoImm = false); 182 bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); 183 bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); 184 bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); 185 bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); 186 bool UndoLRSpillRestore(); 187 bool OptimizeThumb2Instructions(MachineFunction &MF); 188 bool OptimizeThumb2Branches(MachineFunction &MF); 189 bool OptimizeThumb2JumpTables(MachineFunction &MF); 190 191 unsigned GetOffsetOf(MachineInstr *MI) const; 192 void dumpBBs(); 193 void verify(MachineFunction &MF); 194 }; 195 char ARMConstantIslands::ID = 0; 196} 197 198/// verify - check BBOffsets, BBSizes, alignment of islands 199void ARMConstantIslands::verify(MachineFunction &MF) { 200 assert(BBOffsets.size() == BBSizes.size()); 201 for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) 202 assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); 203 if (!isThumb) 204 return; 205#ifndef NDEBUG 206 for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); 207 MBBI != E; ++MBBI) { 208 MachineBasicBlock *MBB = MBBI; 209 if (!MBB->empty() && 210 MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { 211 unsigned MBBId = MBB->getNumber(); 212 assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || 213 (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); 214 } 215 } 216#endif 217} 218 219/// print block size and offset information - debugging 220void ARMConstantIslands::dumpBBs() { 221 for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { 222 DEBUG(errs() << "block " << J << " offset " << BBOffsets[J] 223 << " size " << BBSizes[J] << "\n"); 224 } 225} 226 227/// createARMConstantIslandPass - returns an instance of the constpool 228/// island pass. 229FunctionPass *llvm::createARMConstantIslandPass() { 230 return new ARMConstantIslands(); 231} 232 233bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { 234 MachineConstantPool &MCP = *MF.getConstantPool(); 235 236 TII = MF.getTarget().getInstrInfo(); 237 AFI = MF.getInfo<ARMFunctionInfo>(); 238 STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); 239 240 isThumb = AFI->isThumbFunction(); 241 isThumb1 = AFI->isThumb1OnlyFunction(); 242 isThumb2 = AFI->isThumb2Function(); 243 244 HasFarJump = false; 245 246 // Renumber all of the machine basic blocks in the function, guaranteeing that 247 // the numbers agree with the position of the block in the function. 248 MF.RenumberBlocks(); 249 250 // Thumb1 functions containing constant pools get 4-byte alignment. 251 // This is so we can keep exact track of where the alignment padding goes. 252 253 // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte 254 // aligned. 255 AFI->setAlign(isThumb1 ? 1U : 2U); 256 257 // Perform the initial placement of the constant pool entries. To start with, 258 // we put them all at the end of the function. 259 std::vector<MachineInstr*> CPEMIs; 260 if (!MCP.isEmpty()) { 261 DoInitialPlacement(MF, CPEMIs); 262 if (isThumb1) 263 AFI->setAlign(2U); 264 } 265 266 /// The next UID to take is the first unused one. 267 AFI->initConstPoolEntryUId(CPEMIs.size()); 268 269 // Do the initial scan of the function, building up information about the 270 // sizes of each block, the location of all the water, and finding all of the 271 // constant pool users. 272 InitialFunctionScan(MF, CPEMIs); 273 CPEMIs.clear(); 274 275 /// Remove dead constant pool entries. 276 RemoveUnusedCPEntries(); 277 278 // Iteratively place constant pool entries and fix up branches until there 279 // is no change. 280 bool MadeChange = false; 281 unsigned NoCPIters = 0, NoBRIters = 0; 282 while (true) { 283 bool CPChange = false; 284 for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) 285 CPChange |= HandleConstantPoolUser(MF, i); 286 if (CPChange && ++NoCPIters > 30) 287 llvm_unreachable("Constant Island pass failed to converge!"); 288 DEBUG(dumpBBs()); 289 290 bool BRChange = false; 291 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) 292 BRChange |= FixUpImmediateBr(MF, ImmBranches[i]); 293 if (BRChange && ++NoBRIters > 30) 294 llvm_unreachable("Branch Fix Up pass failed to converge!"); 295 DEBUG(dumpBBs()); 296 297 if (!CPChange && !BRChange) 298 break; 299 MadeChange = true; 300 } 301 302 // Shrink 32-bit Thumb2 branch, load, and store instructions. 303 if (isThumb2) 304 MadeChange |= OptimizeThumb2Instructions(MF); 305 306 // After a while, this might be made debug-only, but it is not expensive. 307 verify(MF); 308 309 // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. 310 // Undo the spill / restore of LR if possible. 311 if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) 312 MadeChange |= UndoLRSpillRestore(); 313 314 BBSizes.clear(); 315 BBOffsets.clear(); 316 WaterList.clear(); 317 CPUsers.clear(); 318 CPEntries.clear(); 319 ImmBranches.clear(); 320 PushPopMIs.clear(); 321 T2JumpTables.clear(); 322 323 return MadeChange; 324} 325 326/// DoInitialPlacement - Perform the initial placement of the constant pool 327/// entries. To start with, we put them all at the end of the function. 328void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, 329 std::vector<MachineInstr*> &CPEMIs) { 330 // Create the basic block to hold the CPE's. 331 MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); 332 MF.push_back(BB); 333 334 // Add all of the constants from the constant pool to the end block, use an 335 // identity mapping of CPI's to CPE's. 336 const std::vector<MachineConstantPoolEntry> &CPs = 337 MF.getConstantPool()->getConstants(); 338 339 const TargetData &TD = *MF.getTarget().getTargetData(); 340 for (unsigned i = 0, e = CPs.size(); i != e; ++i) { 341 unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); 342 // Verify that all constant pool entries are a multiple of 4 bytes. If not, 343 // we would have to pad them out or something so that instructions stay 344 // aligned. 345 assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); 346 MachineInstr *CPEMI = 347 BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) 348 .addImm(i).addConstantPoolIndex(i).addImm(Size); 349 CPEMIs.push_back(CPEMI); 350 351 // Add a new CPEntry, but no corresponding CPUser yet. 352 std::vector<CPEntry> CPEs; 353 CPEs.push_back(CPEntry(CPEMI, i)); 354 CPEntries.push_back(CPEs); 355 NumCPEs++; 356 DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i 357 << "\n"); 358 } 359} 360 361/// BBHasFallthrough - Return true if the specified basic block can fallthrough 362/// into the block immediately after it. 363static bool BBHasFallthrough(MachineBasicBlock *MBB) { 364 // Get the next machine basic block in the function. 365 MachineFunction::iterator MBBI = MBB; 366 if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. 367 return false; 368 369 MachineBasicBlock *NextBB = next(MBBI); 370 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 371 E = MBB->succ_end(); I != E; ++I) 372 if (*I == NextBB) 373 return true; 374 375 return false; 376} 377 378/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, 379/// look up the corresponding CPEntry. 380ARMConstantIslands::CPEntry 381*ARMConstantIslands::findConstPoolEntry(unsigned CPI, 382 const MachineInstr *CPEMI) { 383 std::vector<CPEntry> &CPEs = CPEntries[CPI]; 384 // Number of entries per constpool index should be small, just do a 385 // linear search. 386 for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { 387 if (CPEs[i].CPEMI == CPEMI) 388 return &CPEs[i]; 389 } 390 return NULL; 391} 392 393/// InitialFunctionScan - Do the initial scan of the function, building up 394/// information about the sizes of each block, the location of all the water, 395/// and finding all of the constant pool users. 396void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, 397 const std::vector<MachineInstr*> &CPEMIs) { 398 unsigned Offset = 0; 399 for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); 400 MBBI != E; ++MBBI) { 401 MachineBasicBlock &MBB = *MBBI; 402 403 // If this block doesn't fall through into the next MBB, then this is 404 // 'water' that a constant pool island could be placed. 405 if (!BBHasFallthrough(&MBB)) 406 WaterList.push_back(&MBB); 407 408 unsigned MBBSize = 0; 409 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 410 I != E; ++I) { 411 // Add instruction size to MBBSize. 412 MBBSize += TII->GetInstSizeInBytes(I); 413 414 int Opc = I->getOpcode(); 415 if (I->getDesc().isBranch()) { 416 bool isCond = false; 417 unsigned Bits = 0; 418 unsigned Scale = 1; 419 int UOpc = Opc; 420 switch (Opc) { 421 default: 422 continue; // Ignore other JT branches 423 case ARM::tBR_JTr: 424 // A Thumb1 table jump may involve padding; for the offsets to 425 // be right, functions containing these must be 4-byte aligned. 426 AFI->setAlign(2U); 427 if ((Offset+MBBSize)%4 != 0) 428 // FIXME: Add a pseudo ALIGN instruction instead. 429 MBBSize += 2; // padding 430 continue; // Does not get an entry in ImmBranches 431 case ARM::t2BR_JT: 432 T2JumpTables.push_back(I); 433 continue; // Does not get an entry in ImmBranches 434 case ARM::Bcc: 435 isCond = true; 436 UOpc = ARM::B; 437 // Fallthrough 438 case ARM::B: 439 Bits = 24; 440 Scale = 4; 441 break; 442 case ARM::tBcc: 443 isCond = true; 444 UOpc = ARM::tB; 445 Bits = 8; 446 Scale = 2; 447 break; 448 case ARM::tB: 449 Bits = 11; 450 Scale = 2; 451 break; 452 case ARM::t2Bcc: 453 isCond = true; 454 UOpc = ARM::t2B; 455 Bits = 20; 456 Scale = 2; 457 break; 458 case ARM::t2B: 459 Bits = 24; 460 Scale = 2; 461 break; 462 } 463 464 // Record this immediate branch. 465 unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; 466 ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc)); 467 } 468 469 if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) 470 PushPopMIs.push_back(I); 471 472 if (Opc == ARM::CONSTPOOL_ENTRY) 473 continue; 474 475 // Scan the instructions for constant pool operands. 476 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) 477 if (I->getOperand(op).isCPI()) { 478 // We found one. The addressing mode tells us the max displacement 479 // from the PC that this instruction permits. 480 481 // Basic size info comes from the TSFlags field. 482 unsigned Bits = 0; 483 unsigned Scale = 1; 484 bool NegOk = false; 485 bool IsSoImm = false; 486 487 switch (Opc) { 488 default: 489 llvm_unreachable("Unknown addressing mode for CP reference!"); 490 break; 491 492 // Taking the address of a CP entry. 493 case ARM::LEApcrel: 494 // This takes a SoImm, which is 8 bit immediate rotated. We'll 495 // pretend the maximum offset is 255 * 4. Since each instruction 496 // 4 byte wide, this is always correct. We'llc heck for other 497 // displacements that fits in a SoImm as well. 498 Bits = 8; 499 Scale = 4; 500 NegOk = true; 501 IsSoImm = true; 502 break; 503 case ARM::t2LEApcrel: 504 Bits = 12; 505 NegOk = true; 506 break; 507 case ARM::tLEApcrel: 508 Bits = 8; 509 Scale = 4; 510 break; 511 512 case ARM::LDR: 513 case ARM::LDRcp: 514 case ARM::t2LDRpci: 515 Bits = 12; // +-offset_12 516 NegOk = true; 517 break; 518 519 case ARM::tLDRpci: 520 case ARM::tLDRcp: 521 Bits = 8; 522 Scale = 4; // +(offset_8*4) 523 break; 524 525 case ARM::FLDD: 526 case ARM::FLDS: 527 Bits = 8; 528 Scale = 4; // +-(offset_8*4) 529 NegOk = true; 530 break; 531 } 532 533 // Remember that this is a user of a CP entry. 534 unsigned CPI = I->getOperand(op).getIndex(); 535 MachineInstr *CPEMI = CPEMIs[CPI]; 536 unsigned MaxOffs = ((1 << Bits)-1) * Scale; 537 CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); 538 539 // Increment corresponding CPEntry reference count. 540 CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); 541 assert(CPE && "Cannot find a corresponding CPEntry!"); 542 CPE->RefCount++; 543 544 // Instructions can only use one CP entry, don't bother scanning the 545 // rest of the operands. 546 break; 547 } 548 } 549 550 // In thumb mode, if this block is a constpool island, we may need padding 551 // so it's aligned on 4 byte boundary. 552 if (isThumb && 553 !MBB.empty() && 554 MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && 555 (Offset%4) != 0) 556 MBBSize += 2; 557 558 BBSizes.push_back(MBBSize); 559 BBOffsets.push_back(Offset); 560 Offset += MBBSize; 561 } 562} 563 564/// GetOffsetOf - Return the current offset of the specified machine instruction 565/// from the start of the function. This offset changes as stuff is moved 566/// around inside the function. 567unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { 568 MachineBasicBlock *MBB = MI->getParent(); 569 570 // The offset is composed of two things: the sum of the sizes of all MBB's 571 // before this instruction's block, and the offset from the start of the block 572 // it is in. 573 unsigned Offset = BBOffsets[MBB->getNumber()]; 574 575 // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has 576 // alignment padding, and compensate if so. 577 if (isThumb && 578 MI->getOpcode() == ARM::CONSTPOOL_ENTRY && 579 Offset%4 != 0) 580 Offset += 2; 581 582 // Sum instructions before MI in MBB. 583 for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { 584 assert(I != MBB->end() && "Didn't find MI in its own basic block?"); 585 if (&*I == MI) return Offset; 586 Offset += TII->GetInstSizeInBytes(I); 587 } 588} 589 590/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB 591/// ID. 592static bool CompareMBBNumbers(const MachineBasicBlock *LHS, 593 const MachineBasicBlock *RHS) { 594 return LHS->getNumber() < RHS->getNumber(); 595} 596 597/// UpdateForInsertedWaterBlock - When a block is newly inserted into the 598/// machine function, it upsets all of the block numbers. Renumber the blocks 599/// and update the arrays that parallel this numbering. 600void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { 601 // Renumber the MBB's to keep them consequtive. 602 NewBB->getParent()->RenumberBlocks(NewBB); 603 604 // Insert a size into BBSizes to align it properly with the (newly 605 // renumbered) block numbers. 606 BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); 607 608 // Likewise for BBOffsets. 609 BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); 610 611 // Next, update WaterList. Specifically, we need to add NewMBB as having 612 // available water after it. 613 water_iterator IP = 614 std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, 615 CompareMBBNumbers); 616 WaterList.insert(IP, NewBB); 617} 618 619 620/// Split the basic block containing MI into two blocks, which are joined by 621/// an unconditional branch. Update datastructures and renumber blocks to 622/// account for this change and returns the newly created block. 623MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { 624 MachineBasicBlock *OrigBB = MI->getParent(); 625 MachineFunction &MF = *OrigBB->getParent(); 626 627 // Create a new MBB for the code after the OrigBB. 628 MachineBasicBlock *NewBB = 629 MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); 630 MachineFunction::iterator MBBI = OrigBB; ++MBBI; 631 MF.insert(MBBI, NewBB); 632 633 // Splice the instructions starting with MI over to NewBB. 634 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); 635 636 // Add an unconditional branch from OrigBB to NewBB. 637 // Note the new unconditional branch is not being recorded. 638 // There doesn't seem to be meaningful DebugInfo available; this doesn't 639 // correspond to anything in the source. 640 unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; 641 BuildMI(OrigBB, DebugLoc::getUnknownLoc(), TII->get(Opc)).addMBB(NewBB); 642 NumSplit++; 643 644 // Update the CFG. All succs of OrigBB are now succs of NewBB. 645 while (!OrigBB->succ_empty()) { 646 MachineBasicBlock *Succ = *OrigBB->succ_begin(); 647 OrigBB->removeSuccessor(Succ); 648 NewBB->addSuccessor(Succ); 649 650 // This pass should be run after register allocation, so there should be no 651 // PHI nodes to update. 652 assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI) 653 && "PHI nodes should be eliminated by now!"); 654 } 655 656 // OrigBB branches to NewBB. 657 OrigBB->addSuccessor(NewBB); 658 659 // Update internal data structures to account for the newly inserted MBB. 660 // This is almost the same as UpdateForInsertedWaterBlock, except that 661 // the Water goes after OrigBB, not NewBB. 662 MF.RenumberBlocks(NewBB); 663 664 // Insert a size into BBSizes to align it properly with the (newly 665 // renumbered) block numbers. 666 BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); 667 668 // Likewise for BBOffsets. 669 BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); 670 671 // Next, update WaterList. Specifically, we need to add OrigMBB as having 672 // available water after it (but not if it's already there, which happens 673 // when splitting before a conditional branch that is followed by an 674 // unconditional branch - in that case we want to insert NewBB). 675 water_iterator IP = 676 std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, 677 CompareMBBNumbers); 678 MachineBasicBlock* WaterBB = *IP; 679 if (WaterBB == OrigBB) 680 WaterList.insert(next(IP), NewBB); 681 else 682 WaterList.insert(IP, OrigBB); 683 684 // Figure out how large the first NewMBB is. (It cannot 685 // contain a constpool_entry or tablejump.) 686 unsigned NewBBSize = 0; 687 for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); 688 I != E; ++I) 689 NewBBSize += TII->GetInstSizeInBytes(I); 690 691 unsigned OrigBBI = OrigBB->getNumber(); 692 unsigned NewBBI = NewBB->getNumber(); 693 // Set the size of NewBB in BBSizes. 694 BBSizes[NewBBI] = NewBBSize; 695 696 // We removed instructions from UserMBB, subtract that off from its size. 697 // Add 2 or 4 to the block to count the unconditional branch we added to it. 698 int delta = isThumb1 ? 2 : 4; 699 BBSizes[OrigBBI] -= NewBBSize - delta; 700 701 // ...and adjust BBOffsets for NewBB accordingly. 702 BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; 703 704 // All BBOffsets following these blocks must be modified. 705 AdjustBBOffsetsAfter(NewBB, delta); 706 707 return NewBB; 708} 709 710/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool 711/// reference) is within MaxDisp of TrialOffset (a proposed location of a 712/// constant pool entry). 713bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, 714 unsigned TrialOffset, unsigned MaxDisp, 715 bool NegativeOK, bool IsSoImm) { 716 // On Thumb offsets==2 mod 4 are rounded down by the hardware for 717 // purposes of the displacement computation; compensate for that here. 718 // Effectively, the valid range of displacements is 2 bytes smaller for such 719 // references. 720 unsigned TotalAdj = 0; 721 if (isThumb && UserOffset%4 !=0) { 722 UserOffset -= 2; 723 TotalAdj = 2; 724 } 725 // CPEs will be rounded up to a multiple of 4. 726 if (isThumb && TrialOffset%4 != 0) { 727 TrialOffset += 2; 728 TotalAdj += 2; 729 } 730 731 // In Thumb2 mode, later branch adjustments can shift instructions up and 732 // cause alignment change. In the worst case scenario this can cause the 733 // user's effective address to be subtracted by 2 and the CPE's address to 734 // be plus 2. 735 if (isThumb2 && TotalAdj != 4) 736 MaxDisp -= (4 - TotalAdj); 737 738 if (UserOffset <= TrialOffset) { 739 // User before the Trial. 740 if (TrialOffset - UserOffset <= MaxDisp) 741 return true; 742 // FIXME: Make use full range of soimm values. 743 } else if (NegativeOK) { 744 if (UserOffset - TrialOffset <= MaxDisp) 745 return true; 746 // FIXME: Make use full range of soimm values. 747 } 748 return false; 749} 750 751/// WaterIsInRange - Returns true if a CPE placed after the specified 752/// Water (a basic block) will be in range for the specific MI. 753 754bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, 755 MachineBasicBlock* Water, CPUser &U) { 756 unsigned MaxDisp = U.MaxDisp; 757 unsigned CPEOffset = BBOffsets[Water->getNumber()] + 758 BBSizes[Water->getNumber()]; 759 760 // If the CPE is to be inserted before the instruction, that will raise 761 // the offset of the instruction. (Currently applies only to ARM, so 762 // no alignment compensation attempted here.) 763 if (CPEOffset < UserOffset) 764 UserOffset += U.CPEMI->getOperand(2).getImm(); 765 766 return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm); 767} 768 769/// CPEIsInRange - Returns true if the distance between specific MI and 770/// specific ConstPool entry instruction can fit in MI's displacement field. 771bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, 772 MachineInstr *CPEMI, unsigned MaxDisp, 773 bool NegOk, bool DoDump) { 774 unsigned CPEOffset = GetOffsetOf(CPEMI); 775 assert(CPEOffset%4 == 0 && "Misaligned CPE"); 776 777 if (DoDump) { 778 DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() 779 << " max delta=" << MaxDisp 780 << " insn address=" << UserOffset 781 << " CPE address=" << CPEOffset 782 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI); 783 } 784 785 return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); 786} 787 788#ifndef NDEBUG 789/// BBIsJumpedOver - Return true of the specified basic block's only predecessor 790/// unconditionally branches to its only successor. 791static bool BBIsJumpedOver(MachineBasicBlock *MBB) { 792 if (MBB->pred_size() != 1 || MBB->succ_size() != 1) 793 return false; 794 795 MachineBasicBlock *Succ = *MBB->succ_begin(); 796 MachineBasicBlock *Pred = *MBB->pred_begin(); 797 MachineInstr *PredMI = &Pred->back(); 798 if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB 799 || PredMI->getOpcode() == ARM::t2B) 800 return PredMI->getOperand(0).getMBB() == Succ; 801 return false; 802} 803#endif // NDEBUG 804 805void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, 806 int delta) { 807 MachineFunction::iterator MBBI = BB; MBBI = next(MBBI); 808 for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); 809 i < e; ++i) { 810 BBOffsets[i] += delta; 811 // If some existing blocks have padding, adjust the padding as needed, a 812 // bit tricky. delta can be negative so don't use % on that. 813 if (!isThumb) 814 continue; 815 MachineBasicBlock *MBB = MBBI; 816 if (!MBB->empty()) { 817 // Constant pool entries require padding. 818 if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { 819 unsigned OldOffset = BBOffsets[i] - delta; 820 if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) { 821 // add new padding 822 BBSizes[i] += 2; 823 delta += 2; 824 } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) { 825 // remove existing padding 826 BBSizes[i] -= 2; 827 delta -= 2; 828 } 829 } 830 // Thumb1 jump tables require padding. They should be at the end; 831 // following unconditional branches are removed by AnalyzeBranch. 832 MachineInstr *ThumbJTMI = prior(MBB->end()); 833 if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { 834 unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); 835 unsigned OldMIOffset = NewMIOffset - delta; 836 if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) { 837 // remove existing padding 838 BBSizes[i] -= 2; 839 delta -= 2; 840 } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) { 841 // add new padding 842 BBSizes[i] += 2; 843 delta += 2; 844 } 845 } 846 if (delta==0) 847 return; 848 } 849 MBBI = next(MBBI); 850 } 851} 852 853/// DecrementOldEntry - find the constant pool entry with index CPI 854/// and instruction CPEMI, and decrement its refcount. If the refcount 855/// becomes 0 remove the entry and instruction. Returns true if we removed 856/// the entry, false if we didn't. 857 858bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { 859 // Find the old entry. Eliminate it if it is no longer used. 860 CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); 861 assert(CPE && "Unexpected!"); 862 if (--CPE->RefCount == 0) { 863 RemoveDeadCPEMI(CPEMI); 864 CPE->CPEMI = NULL; 865 NumCPEs--; 866 return true; 867 } 868 return false; 869} 870 871/// LookForCPEntryInRange - see if the currently referenced CPE is in range; 872/// if not, see if an in-range clone of the CPE is in range, and if so, 873/// change the data structures so the user references the clone. Returns: 874/// 0 = no existing entry found 875/// 1 = entry found, and there were no code insertions or deletions 876/// 2 = entry found, and there were code insertions or deletions 877int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) 878{ 879 MachineInstr *UserMI = U.MI; 880 MachineInstr *CPEMI = U.CPEMI; 881 882 // Check to see if the CPE is already in-range. 883 if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { 884 DEBUG(errs() << "In range\n"); 885 return 1; 886 } 887 888 // No. Look for previously created clones of the CPE that are in range. 889 unsigned CPI = CPEMI->getOperand(1).getIndex(); 890 std::vector<CPEntry> &CPEs = CPEntries[CPI]; 891 for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { 892 // We already tried this one 893 if (CPEs[i].CPEMI == CPEMI) 894 continue; 895 // Removing CPEs can leave empty entries, skip 896 if (CPEs[i].CPEMI == NULL) 897 continue; 898 if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { 899 DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#" 900 << CPEs[i].CPI << "\n"); 901 // Point the CPUser node to the replacement 902 U.CPEMI = CPEs[i].CPEMI; 903 // Change the CPI in the instruction operand to refer to the clone. 904 for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) 905 if (UserMI->getOperand(j).isCPI()) { 906 UserMI->getOperand(j).setIndex(CPEs[i].CPI); 907 break; 908 } 909 // Adjust the refcount of the clone... 910 CPEs[i].RefCount++; 911 // ...and the original. If we didn't remove the old entry, none of the 912 // addresses changed, so we don't need another pass. 913 return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; 914 } 915 } 916 return 0; 917} 918 919/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in 920/// the specific unconditional branch instruction. 921static inline unsigned getUnconditionalBrDisp(int Opc) { 922 switch (Opc) { 923 case ARM::tB: 924 return ((1<<10)-1)*2; 925 case ARM::t2B: 926 return ((1<<23)-1)*2; 927 default: 928 break; 929 } 930 931 return ((1<<23)-1)*4; 932} 933 934/// AcceptWater - Small amount of common code factored out of the following. 935 936MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB, 937 water_iterator IP) { 938 DEBUG(errs() << "found water in range\n"); 939 // Remove the original WaterList entry; we want subsequent 940 // insertions in this vicinity to go after the one we're 941 // about to insert. This considerably reduces the number 942 // of times we have to move the same CPE more than once. 943 WaterList.erase(IP); 944 // CPE goes before following block (NewMBB). 945 return next(MachineFunction::iterator(WaterBB)); 946} 947 948/// LookForWater - look for an existing entry in the WaterList in which 949/// we can place the CPE referenced from U so it's within range of U's MI. 950/// Returns true if found, false if not. If it returns true, *NewMBB 951/// is set to the WaterList entry. 952/// For ARM, we prefer the water that's farthest away. For Thumb, prefer 953/// water that will not introduce padding to water that will; within each 954/// group, prefer the water that's farthest away. 955bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, 956 MachineBasicBlock** NewMBB) { 957 water_iterator IPThatWouldPad; 958 MachineBasicBlock* WaterBBThatWouldPad = NULL; 959 if (!WaterList.empty()) { 960 for (water_iterator IP = prior(WaterList.end()), 961 B = WaterList.begin();; --IP) { 962 MachineBasicBlock* WaterBB = *IP; 963 if (WaterIsInRange(UserOffset, WaterBB, U)) { 964 unsigned WBBId = WaterBB->getNumber(); 965 if (isThumb && 966 (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { 967 // This is valid Water, but would introduce padding. Remember 968 // it in case we don't find any Water that doesn't do this. 969 if (!WaterBBThatWouldPad) { 970 WaterBBThatWouldPad = WaterBB; 971 IPThatWouldPad = IP; 972 } 973 } else { 974 *NewMBB = AcceptWater(WaterBB, IP); 975 return true; 976 } 977 } 978 if (IP == B) 979 break; 980 } 981 } 982 if (isThumb && WaterBBThatWouldPad) { 983 *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad); 984 return true; 985 } 986 return false; 987} 988 989/// CreateNewWater - No existing WaterList entry will work for 990/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the 991/// block is used if in range, and the conditional branch munged so control 992/// flow is correct. Otherwise the block is split to create a hole with an 993/// unconditional branch around it. In either case *NewMBB is set to a 994/// block following which the new island can be inserted (the WaterList 995/// is not adjusted). 996 997void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, 998 unsigned UserOffset, MachineBasicBlock** NewMBB) { 999 CPUser &U = CPUsers[CPUserIndex]; 1000 MachineInstr *UserMI = U.MI; 1001 MachineInstr *CPEMI = U.CPEMI; 1002 MachineBasicBlock *UserMBB = UserMI->getParent(); 1003 unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + 1004 BBSizes[UserMBB->getNumber()]; 1005 assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); 1006 1007 // If the use is at the end of the block, or the end of the block 1008 // is within range, make new water there. (The addition below is 1009 // for the unconditional branch we will be adding: 4 bytes on ARM + Thumb2, 1010 // 2 on Thumb1. Possible Thumb1 alignment padding is allowed for 1011 // inside OffsetIsInRange. 1012 // If the block ends in an unconditional branch already, it is water, 1013 // and is known to be out of range, so we'll always be adding a branch.) 1014 if (&UserMBB->back() == UserMI || 1015 OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), 1016 U.MaxDisp, U.NegOk, U.IsSoImm)) { 1017 DEBUG(errs() << "Split at end of block\n"); 1018 if (&UserMBB->back() == UserMI) 1019 assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); 1020 *NewMBB = next(MachineFunction::iterator(UserMBB)); 1021 // Add an unconditional branch from UserMBB to fallthrough block. 1022 // Record it for branch lengthening; this new branch will not get out of 1023 // range, but if the preceding conditional branch is out of range, the 1024 // targets will be exchanged, and the altered branch may be out of 1025 // range, so the machinery has to know about it. 1026 int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; 1027 BuildMI(UserMBB, DebugLoc::getUnknownLoc(), 1028 TII->get(UncondBr)).addMBB(*NewMBB); 1029 unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); 1030 ImmBranches.push_back(ImmBranch(&UserMBB->back(), 1031 MaxDisp, false, UncondBr)); 1032 int delta = isThumb1 ? 2 : 4; 1033 BBSizes[UserMBB->getNumber()] += delta; 1034 AdjustBBOffsetsAfter(UserMBB, delta); 1035 } else { 1036 // What a big block. Find a place within the block to split it. 1037 // This is a little tricky on Thumb1 since instructions are 2 bytes 1038 // and constant pool entries are 4 bytes: if instruction I references 1039 // island CPE, and instruction I+1 references CPE', it will 1040 // not work well to put CPE as far forward as possible, since then 1041 // CPE' cannot immediately follow it (that location is 2 bytes 1042 // farther away from I+1 than CPE was from I) and we'd need to create 1043 // a new island. So, we make a first guess, then walk through the 1044 // instructions between the one currently being looked at and the 1045 // possible insertion point, and make sure any other instructions 1046 // that reference CPEs will be able to use the same island area; 1047 // if not, we back up the insertion point. 1048 1049 // The 4 in the following is for the unconditional branch we'll be 1050 // inserting (allows for long branch on Thumb1). Alignment of the 1051 // island is handled inside OffsetIsInRange. 1052 unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; 1053 // This could point off the end of the block if we've already got 1054 // constant pool entries following this block; only the last one is 1055 // in the water list. Back past any possible branches (allow for a 1056 // conditional and a maximally long unconditional). 1057 if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) 1058 BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - 1059 (isThumb1 ? 6 : 8); 1060 unsigned EndInsertOffset = BaseInsertOffset + 1061 CPEMI->getOperand(2).getImm(); 1062 MachineBasicBlock::iterator MI = UserMI; 1063 ++MI; 1064 unsigned CPUIndex = CPUserIndex+1; 1065 for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); 1066 Offset < BaseInsertOffset; 1067 Offset += TII->GetInstSizeInBytes(MI), 1068 MI = next(MI)) { 1069 if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { 1070 CPUser &U = CPUsers[CPUIndex]; 1071 if (!OffsetIsInRange(Offset, EndInsertOffset, 1072 U.MaxDisp, U.NegOk, U.IsSoImm)) { 1073 BaseInsertOffset -= (isThumb1 ? 2 : 4); 1074 EndInsertOffset -= (isThumb1 ? 2 : 4); 1075 } 1076 // This is overly conservative, as we don't account for CPEMIs 1077 // being reused within the block, but it doesn't matter much. 1078 EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); 1079 CPUIndex++; 1080 } 1081 } 1082 DEBUG(errs() << "Split in middle of big block\n"); 1083 *NewMBB = SplitBlockBeforeInstr(prior(MI)); 1084 } 1085} 1086 1087/// HandleConstantPoolUser - Analyze the specified user, checking to see if it 1088/// is out-of-range. If so, pick up the constant pool value and move it some 1089/// place in-range. Return true if we changed any addresses (thus must run 1090/// another pass of branch lengthening), false otherwise. 1091bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, 1092 unsigned CPUserIndex) { 1093 CPUser &U = CPUsers[CPUserIndex]; 1094 MachineInstr *UserMI = U.MI; 1095 MachineInstr *CPEMI = U.CPEMI; 1096 unsigned CPI = CPEMI->getOperand(1).getIndex(); 1097 unsigned Size = CPEMI->getOperand(2).getImm(); 1098 MachineBasicBlock *NewMBB; 1099 // Compute this only once, it's expensive. The 4 or 8 is the value the 1100 // hardware keeps in the PC. 1101 unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); 1102 1103 // See if the current entry is within range, or there is a clone of it 1104 // in range. 1105 int result = LookForExistingCPEntry(U, UserOffset); 1106 if (result==1) return false; 1107 else if (result==2) return true; 1108 1109 // No existing clone of this CPE is within range. 1110 // We will be generating a new clone. Get a UID for it. 1111 unsigned ID = AFI->createConstPoolEntryUId(); 1112 1113 // Look for water where we can place this CPE. We look for the farthest one 1114 // away that will work. Forward references only for now (although later 1115 // we might find some that are backwards). 1116 1117 if (!LookForWater(U, UserOffset, &NewMBB)) { 1118 // No water found. 1119 DEBUG(errs() << "No water found\n"); 1120 CreateNewWater(CPUserIndex, UserOffset, &NewMBB); 1121 } 1122 1123 // Okay, we know we can put an island before NewMBB now, do it! 1124 MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); 1125 MF.insert(NewMBB, NewIsland); 1126 1127 // Update internal data structures to account for the newly inserted MBB. 1128 UpdateForInsertedWaterBlock(NewIsland); 1129 1130 // Decrement the old entry, and remove it if refcount becomes 0. 1131 DecrementOldEntry(CPI, CPEMI); 1132 1133 // Now that we have an island to add the CPE to, clone the original CPE and 1134 // add it to the island. 1135 U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(), 1136 TII->get(ARM::CONSTPOOL_ENTRY)) 1137 .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); 1138 CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); 1139 NumCPEs++; 1140 1141 BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; 1142 // Compensate for .align 2 in thumb mode. 1143 if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0) 1144 Size += 2; 1145 // Increase the size of the island block to account for the new entry. 1146 BBSizes[NewIsland->getNumber()] += Size; 1147 AdjustBBOffsetsAfter(NewIsland, Size); 1148 1149 // Finally, change the CPI in the instruction operand to be ID. 1150 for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) 1151 if (UserMI->getOperand(i).isCPI()) { 1152 UserMI->getOperand(i).setIndex(ID); 1153 break; 1154 } 1155 1156 DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI 1157 << '\t' << *UserMI); 1158 1159 return true; 1160} 1161 1162/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update 1163/// sizes and offsets of impacted basic blocks. 1164void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { 1165 MachineBasicBlock *CPEBB = CPEMI->getParent(); 1166 unsigned Size = CPEMI->getOperand(2).getImm(); 1167 CPEMI->eraseFromParent(); 1168 BBSizes[CPEBB->getNumber()] -= Size; 1169 // All succeeding offsets have the current size value added in, fix this. 1170 if (CPEBB->empty()) { 1171 // In thumb1 mode, the size of island may be padded by two to compensate for 1172 // the alignment requirement. Then it will now be 2 when the block is 1173 // empty, so fix this. 1174 // All succeeding offsets have the current size value added in, fix this. 1175 if (BBSizes[CPEBB->getNumber()] != 0) { 1176 Size += BBSizes[CPEBB->getNumber()]; 1177 BBSizes[CPEBB->getNumber()] = 0; 1178 } 1179 } 1180 AdjustBBOffsetsAfter(CPEBB, -Size); 1181 // An island has only one predecessor BB and one successor BB. Check if 1182 // this BB's predecessor jumps directly to this BB's successor. This 1183 // shouldn't happen currently. 1184 assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); 1185 // FIXME: remove the empty blocks after all the work is done? 1186} 1187 1188/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts 1189/// are zero. 1190bool ARMConstantIslands::RemoveUnusedCPEntries() { 1191 unsigned MadeChange = false; 1192 for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { 1193 std::vector<CPEntry> &CPEs = CPEntries[i]; 1194 for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { 1195 if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { 1196 RemoveDeadCPEMI(CPEs[j].CPEMI); 1197 CPEs[j].CPEMI = NULL; 1198 MadeChange = true; 1199 } 1200 } 1201 } 1202 return MadeChange; 1203} 1204 1205/// BBIsInRange - Returns true if the distance between specific MI and 1206/// specific BB can fit in MI's displacement field. 1207bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, 1208 unsigned MaxDisp) { 1209 unsigned PCAdj = isThumb ? 4 : 8; 1210 unsigned BrOffset = GetOffsetOf(MI) + PCAdj; 1211 unsigned DestOffset = BBOffsets[DestBB->getNumber()]; 1212 1213 DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber() 1214 << " from BB#" << MI->getParent()->getNumber() 1215 << " max delta=" << MaxDisp 1216 << " from " << GetOffsetOf(MI) << " to " << DestOffset 1217 << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); 1218 1219 if (BrOffset <= DestOffset) { 1220 // Branch before the Dest. 1221 if (DestOffset-BrOffset <= MaxDisp) 1222 return true; 1223 } else { 1224 if (BrOffset-DestOffset <= MaxDisp) 1225 return true; 1226 } 1227 return false; 1228} 1229 1230/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far 1231/// away to fit in its displacement field. 1232bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { 1233 MachineInstr *MI = Br.MI; 1234 MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); 1235 1236 // Check to see if the DestBB is already in-range. 1237 if (BBIsInRange(MI, DestBB, Br.MaxDisp)) 1238 return false; 1239 1240 if (!Br.isCond) 1241 return FixUpUnconditionalBr(MF, Br); 1242 return FixUpConditionalBr(MF, Br); 1243} 1244 1245/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is 1246/// too far away to fit in its displacement field. If the LR register has been 1247/// spilled in the epilogue, then we can use BL to implement a far jump. 1248/// Otherwise, add an intermediate branch instruction to a branch. 1249bool 1250ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { 1251 MachineInstr *MI = Br.MI; 1252 MachineBasicBlock *MBB = MI->getParent(); 1253 if (!isThumb1) 1254 llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!"); 1255 1256 // Use BL to implement far jump. 1257 Br.MaxDisp = (1 << 21) * 2; 1258 MI->setDesc(TII->get(ARM::tBfar)); 1259 BBSizes[MBB->getNumber()] += 2; 1260 AdjustBBOffsetsAfter(MBB, 2); 1261 HasFarJump = true; 1262 NumUBrFixed++; 1263 1264 DEBUG(errs() << " Changed B to long jump " << *MI); 1265 1266 return true; 1267} 1268 1269/// FixUpConditionalBr - Fix up a conditional branch whose destination is too 1270/// far away to fit in its displacement field. It is converted to an inverse 1271/// conditional branch + an unconditional branch to the destination. 1272bool 1273ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { 1274 MachineInstr *MI = Br.MI; 1275 MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); 1276 1277 // Add an unconditional branch to the destination and invert the branch 1278 // condition to jump over it: 1279 // blt L1 1280 // => 1281 // bge L2 1282 // b L1 1283 // L2: 1284 ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); 1285 CC = ARMCC::getOppositeCondition(CC); 1286 unsigned CCReg = MI->getOperand(2).getReg(); 1287 1288 // If the branch is at the end of its MBB and that has a fall-through block, 1289 // direct the updated conditional branch to the fall-through block. Otherwise, 1290 // split the MBB before the next instruction. 1291 MachineBasicBlock *MBB = MI->getParent(); 1292 MachineInstr *BMI = &MBB->back(); 1293 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); 1294 1295 NumCBrFixed++; 1296 if (BMI != MI) { 1297 if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && 1298 BMI->getOpcode() == Br.UncondBr) { 1299 // Last MI in the BB is an unconditional branch. Can we simply invert the 1300 // condition and swap destinations: 1301 // beq L1 1302 // b L2 1303 // => 1304 // bne L2 1305 // b L1 1306 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); 1307 if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { 1308 DEBUG(errs() << " Invert Bcc condition and swap its destination with " 1309 << *BMI); 1310 BMI->getOperand(0).setMBB(DestBB); 1311 MI->getOperand(0).setMBB(NewDest); 1312 MI->getOperand(1).setImm(CC); 1313 return true; 1314 } 1315 } 1316 } 1317 1318 if (NeedSplit) { 1319 SplitBlockBeforeInstr(MI); 1320 // No need for the branch to the next block. We're adding an unconditional 1321 // branch to the destination. 1322 int delta = TII->GetInstSizeInBytes(&MBB->back()); 1323 BBSizes[MBB->getNumber()] -= delta; 1324 MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB)); 1325 AdjustBBOffsetsAfter(SplitBB, -delta); 1326 MBB->back().eraseFromParent(); 1327 // BBOffsets[SplitBB] is wrong temporarily, fixed below 1328 } 1329 MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); 1330 1331 DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() 1332 << " also invert condition and change dest. to BB#" 1333 << NextBB->getNumber() << "\n"); 1334 1335 // Insert a new conditional branch and a new unconditional branch. 1336 // Also update the ImmBranch as well as adding a new entry for the new branch. 1337 BuildMI(MBB, DebugLoc::getUnknownLoc(), 1338 TII->get(MI->getOpcode())) 1339 .addMBB(NextBB).addImm(CC).addReg(CCReg); 1340 Br.MI = &MBB->back(); 1341 BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); 1342 BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); 1343 BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); 1344 unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); 1345 ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); 1346 1347 // Remove the old conditional branch. It may or may not still be in MBB. 1348 BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); 1349 MI->eraseFromParent(); 1350 1351 // The net size change is an addition of one unconditional branch. 1352 int delta = TII->GetInstSizeInBytes(&MBB->back()); 1353 AdjustBBOffsetsAfter(MBB, delta); 1354 return true; 1355} 1356 1357/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills 1358/// LR / restores LR to pc. FIXME: This is done here because it's only possible 1359/// to do this if tBfar is not used. 1360bool ARMConstantIslands::UndoLRSpillRestore() { 1361 bool MadeChange = false; 1362 for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { 1363 MachineInstr *MI = PushPopMIs[i]; 1364 // First two operands are predicates, the third is a zero since there 1365 // is no writeback. 1366 if (MI->getOpcode() == ARM::tPOP_RET && 1367 MI->getOperand(3).getReg() == ARM::PC && 1368 MI->getNumExplicitOperands() == 4) { 1369 BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)); 1370 MI->eraseFromParent(); 1371 MadeChange = true; 1372 } 1373 } 1374 return MadeChange; 1375} 1376 1377bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { 1378 bool MadeChange = false; 1379 1380 // Shrink ADR and LDR from constantpool. 1381 for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { 1382 CPUser &U = CPUsers[i]; 1383 unsigned Opcode = U.MI->getOpcode(); 1384 unsigned NewOpc = 0; 1385 unsigned Scale = 1; 1386 unsigned Bits = 0; 1387 switch (Opcode) { 1388 default: break; 1389 case ARM::t2LEApcrel: 1390 if (isARMLowRegister(U.MI->getOperand(0).getReg())) { 1391 NewOpc = ARM::tLEApcrel; 1392 Bits = 8; 1393 Scale = 4; 1394 } 1395 break; 1396 case ARM::t2LDRpci: 1397 if (isARMLowRegister(U.MI->getOperand(0).getReg())) { 1398 NewOpc = ARM::tLDRpci; 1399 Bits = 8; 1400 Scale = 4; 1401 } 1402 break; 1403 } 1404 1405 if (!NewOpc) 1406 continue; 1407 1408 unsigned UserOffset = GetOffsetOf(U.MI) + 4; 1409 unsigned MaxOffs = ((1 << Bits) - 1) * Scale; 1410 // FIXME: Check if offset is multiple of scale if scale is not 4. 1411 if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { 1412 U.MI->setDesc(TII->get(NewOpc)); 1413 MachineBasicBlock *MBB = U.MI->getParent(); 1414 BBSizes[MBB->getNumber()] -= 2; 1415 AdjustBBOffsetsAfter(MBB, -2); 1416 ++NumT2CPShrunk; 1417 MadeChange = true; 1418 } 1419 } 1420 1421 MadeChange |= OptimizeThumb2Branches(MF); 1422 MadeChange |= OptimizeThumb2JumpTables(MF); 1423 return MadeChange; 1424} 1425 1426bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { 1427 bool MadeChange = false; 1428 1429 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { 1430 ImmBranch &Br = ImmBranches[i]; 1431 unsigned Opcode = Br.MI->getOpcode(); 1432 unsigned NewOpc = 0; 1433 unsigned Scale = 1; 1434 unsigned Bits = 0; 1435 switch (Opcode) { 1436 default: break; 1437 case ARM::t2B: 1438 NewOpc = ARM::tB; 1439 Bits = 11; 1440 Scale = 2; 1441 break; 1442 case ARM::t2Bcc: 1443 NewOpc = ARM::tBcc; 1444 Bits = 8; 1445 Scale = 2; 1446 break; 1447 } 1448 if (!NewOpc) 1449 continue; 1450 1451 unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; 1452 MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); 1453 if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { 1454 Br.MI->setDesc(TII->get(NewOpc)); 1455 MachineBasicBlock *MBB = Br.MI->getParent(); 1456 BBSizes[MBB->getNumber()] -= 2; 1457 AdjustBBOffsetsAfter(MBB, -2); 1458 ++NumT2BrShrunk; 1459 MadeChange = true; 1460 } 1461 } 1462 1463 return MadeChange; 1464} 1465 1466 1467/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller 1468/// jumptables when it's possible. 1469bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { 1470 bool MadeChange = false; 1471 1472 // FIXME: After the tables are shrunk, can we get rid some of the 1473 // constantpool tables? 1474 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); 1475 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 1476 for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { 1477 MachineInstr *MI = T2JumpTables[i]; 1478 const TargetInstrDesc &TID = MI->getDesc(); 1479 unsigned NumOps = TID.getNumOperands(); 1480 unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); 1481 MachineOperand JTOP = MI->getOperand(JTOpIdx); 1482 unsigned JTI = JTOP.getIndex(); 1483 assert(JTI < JT.size()); 1484 1485 bool ByteOk = true; 1486 bool HalfWordOk = true; 1487 unsigned JTOffset = GetOffsetOf(MI) + 4; 1488 const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; 1489 for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { 1490 MachineBasicBlock *MBB = JTBBs[j]; 1491 unsigned DstOffset = BBOffsets[MBB->getNumber()]; 1492 // Negative offset is not ok. FIXME: We should change BB layout to make 1493 // sure all the branches are forward. 1494 if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) 1495 ByteOk = false; 1496 unsigned TBHLimit = ((1<<16)-1)*2; 1497 if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit) 1498 HalfWordOk = false; 1499 if (!ByteOk && !HalfWordOk) 1500 break; 1501 } 1502 1503 if (ByteOk || HalfWordOk) { 1504 MachineBasicBlock *MBB = MI->getParent(); 1505 unsigned BaseReg = MI->getOperand(0).getReg(); 1506 bool BaseRegKill = MI->getOperand(0).isKill(); 1507 if (!BaseRegKill) 1508 continue; 1509 unsigned IdxReg = MI->getOperand(1).getReg(); 1510 bool IdxRegKill = MI->getOperand(1).isKill(); 1511 MachineBasicBlock::iterator PrevI = MI; 1512 if (PrevI == MBB->begin()) 1513 continue; 1514 1515 MachineInstr *AddrMI = --PrevI; 1516 bool OptOk = true; 1517 // Examine the instruction that calculate the jumptable entry address. 1518 // If it's not the one just before the t2BR_JT, we won't delete it, then 1519 // it's not worth doing the optimization. 1520 for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { 1521 const MachineOperand &MO = AddrMI->getOperand(k); 1522 if (!MO.isReg() || !MO.getReg()) 1523 continue; 1524 if (MO.isDef() && MO.getReg() != BaseReg) { 1525 OptOk = false; 1526 break; 1527 } 1528 if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) { 1529 OptOk = false; 1530 break; 1531 } 1532 } 1533 if (!OptOk) 1534 continue; 1535 1536 // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want 1537 // to delete it as well. 1538 MachineInstr *LeaMI = --PrevI; 1539 if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && 1540 LeaMI->getOpcode() != ARM::t2LEApcrelJT) || 1541 LeaMI->getOperand(0).getReg() != BaseReg) 1542 OptOk = false; 1543 1544 if (!OptOk) 1545 continue; 1546 1547 unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH; 1548 MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) 1549 .addReg(IdxReg, getKillRegState(IdxRegKill)) 1550 .addJumpTableIndex(JTI, JTOP.getTargetFlags()) 1551 .addImm(MI->getOperand(JTOpIdx+1).getImm()); 1552 // FIXME: Insert an "ALIGN" instruction to ensure the next instruction 1553 // is 2-byte aligned. For now, asm printer will fix it up. 1554 unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); 1555 unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI); 1556 OrigSize += TII->GetInstSizeInBytes(LeaMI); 1557 OrigSize += TII->GetInstSizeInBytes(MI); 1558 1559 AddrMI->eraseFromParent(); 1560 LeaMI->eraseFromParent(); 1561 MI->eraseFromParent(); 1562 1563 int delta = OrigSize - NewSize; 1564 BBSizes[MBB->getNumber()] -= delta; 1565 AdjustBBOffsetsAfter(MBB, -delta); 1566 1567 ++NumTBs; 1568 MadeChange = true; 1569 } 1570 } 1571 1572 return MadeChange; 1573} 1574