SystemZLongBranch.cpp revision 6824f127f90197b26af93cf5d6c13b7941567e54
1//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass does two things: 11// (1) fuse compares and branches into COMPARE AND BRANCH instructions 12// (2) make sure that all branches are in range. 13// 14// We do (1) here rather than earlier because the fused form prevents 15// predication. 16// 17// Doing it so late makes it more likely that a register will be reused 18// between the compare and the branch, but it isn't clear whether preventing 19// that would be a win or not. 20// 21// There are several ways in which (2) could be done. One aggressive 22// approach is to assume that all branches are in range and successively 23// replace those that turn out not to be in range with a longer form 24// (branch relaxation). A simple implementation is to continually walk 25// through the function relaxing branches until no more changes are 26// needed and a fixed point is reached. However, in the pathological 27// worst case, this implementation is quadratic in the number of blocks; 28// relaxing branch N can make branch N-1 go out of range, which in turn 29// can make branch N-2 go out of range, and so on. 30// 31// An alternative approach is to assume that all branches must be 32// converted to their long forms, then reinstate the short forms of 33// branches that, even under this pessimistic assumption, turn out to be 34// in range (branch shortening). This too can be implemented as a function 35// walk that is repeated until a fixed point is reached. In general, 36// the result of shortening is not as good as that of relaxation, and 37// shortening is also quadratic in the worst case; shortening branch N 38// can bring branch N-1 in range of the short form, which in turn can do 39// the same for branch N-2, and so on. The main advantage of shortening 40// is that each walk through the function produces valid code, so it is 41// possible to stop at any point after the first walk. The quadraticness 42// could therefore be handled with a maximum pass count, although the 43// question then becomes: what maximum count should be used? 44// 45// On SystemZ, long branches are only needed for functions bigger than 64k, 46// which are relatively rare to begin with, and the long branch sequences 47// are actually relatively cheap. It therefore doesn't seem worth spending 48// much compilation time on the problem. Instead, the approach we take is: 49// 50// (1) Work out the address that each block would have if no branches 51// need relaxing. Exit the pass early if all branches are in range 52// according to this assumption. 53// 54// (2) Work out the address that each block would have if all branches 55// need relaxing. 56// 57// (3) Walk through the block calculating the final address of each instruction 58// and relaxing those that need to be relaxed. For backward branches, 59// this check uses the final address of the target block, as calculated 60// earlier in the walk. For forward branches, this check uses the 61// address of the target block that was calculated in (2). Both checks 62// give a conservatively-correct range. 63// 64//===----------------------------------------------------------------------===// 65 66#define DEBUG_TYPE "systemz-long-branch" 67 68#include "SystemZTargetMachine.h" 69#include "llvm/ADT/Statistic.h" 70#include "llvm/CodeGen/MachineFunctionPass.h" 71#include "llvm/CodeGen/MachineInstrBuilder.h" 72#include "llvm/IR/Function.h" 73#include "llvm/Support/CommandLine.h" 74#include "llvm/Support/MathExtras.h" 75#include "llvm/Target/TargetInstrInfo.h" 76#include "llvm/Target/TargetMachine.h" 77#include "llvm/Target/TargetRegisterInfo.h" 78 79using namespace llvm; 80 81STATISTIC(LongBranches, "Number of long branches."); 82 83namespace { 84 typedef MachineBasicBlock::iterator Iter; 85 86 // Represents positional information about a basic block. 87 struct MBBInfo { 88 // The address that we currently assume the block has. 89 uint64_t Address; 90 91 // The size of the block in bytes, excluding terminators. 92 // This value never changes. 93 uint64_t Size; 94 95 // The minimum alignment of the block, as a log2 value. 96 // This value never changes. 97 unsigned Alignment; 98 99 // The number of terminators in this block. This value never changes. 100 unsigned NumTerminators; 101 102 MBBInfo() 103 : Address(0), Size(0), Alignment(0), NumTerminators(0) {} 104 }; 105 106 // Represents the state of a block terminator. 107 struct TerminatorInfo { 108 // If this terminator is a relaxable branch, this points to the branch 109 // instruction, otherwise it is null. 110 MachineInstr *Branch; 111 112 // The address that we currently assume the terminator has. 113 uint64_t Address; 114 115 // The current size of the terminator in bytes. 116 uint64_t Size; 117 118 // If Branch is nonnull, this is the number of the target block, 119 // otherwise it is unused. 120 unsigned TargetBlock; 121 122 // If Branch is nonnull, this is the length of the longest relaxed form, 123 // otherwise it is zero. 124 unsigned ExtraRelaxSize; 125 126 TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {} 127 }; 128 129 // Used to keep track of the current position while iterating over the blocks. 130 struct BlockPosition { 131 // The address that we assume this position has. 132 uint64_t Address; 133 134 // The number of low bits in Address that are known to be the same 135 // as the runtime address. 136 unsigned KnownBits; 137 138 BlockPosition(unsigned InitialAlignment) 139 : Address(0), KnownBits(InitialAlignment) {} 140 }; 141 142 class SystemZLongBranch : public MachineFunctionPass { 143 public: 144 static char ID; 145 SystemZLongBranch(const SystemZTargetMachine &tm) 146 : MachineFunctionPass(ID), TII(0) {} 147 148 virtual const char *getPassName() const { 149 return "SystemZ Long Branch"; 150 } 151 152 bool runOnMachineFunction(MachineFunction &F); 153 154 private: 155 void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); 156 void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, 157 bool AssumeRelaxed); 158 TerminatorInfo describeTerminator(MachineInstr *MI); 159 bool fuseCompareAndBranch(MachineInstr *Compare); 160 uint64_t initMBBInfo(); 161 bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); 162 bool mustRelaxABranch(); 163 void setWorstCaseAddresses(); 164 void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode); 165 void relaxBranch(TerminatorInfo &Terminator); 166 void relaxBranches(); 167 168 const SystemZInstrInfo *TII; 169 MachineFunction *MF; 170 SmallVector<MBBInfo, 16> MBBs; 171 SmallVector<TerminatorInfo, 16> Terminators; 172 }; 173 174 char SystemZLongBranch::ID = 0; 175 176 const uint64_t MaxBackwardRange = 0x10000; 177 const uint64_t MaxForwardRange = 0xfffe; 178} // end of anonymous namespace 179 180FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { 181 return new SystemZLongBranch(TM); 182} 183 184// Position describes the state immediately before Block. Update Block 185// accordingly and move Position to the end of the block's non-terminator 186// instructions. 187void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, 188 MBBInfo &Block) { 189 if (Block.Alignment > Position.KnownBits) { 190 // When calculating the address of Block, we need to conservatively 191 // assume that Block had the worst possible misalignment. 192 Position.Address += ((uint64_t(1) << Block.Alignment) - 193 (uint64_t(1) << Position.KnownBits)); 194 Position.KnownBits = Block.Alignment; 195 } 196 197 // Align the addresses. 198 uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1; 199 Position.Address = (Position.Address + AlignMask) & ~AlignMask; 200 201 // Record the block's position. 202 Block.Address = Position.Address; 203 204 // Move past the non-terminators in the block. 205 Position.Address += Block.Size; 206} 207 208// Position describes the state immediately before Terminator. 209// Update Terminator accordingly and move Position past it. 210// Assume that Terminator will be relaxed if AssumeRelaxed. 211void SystemZLongBranch::skipTerminator(BlockPosition &Position, 212 TerminatorInfo &Terminator, 213 bool AssumeRelaxed) { 214 Terminator.Address = Position.Address; 215 Position.Address += Terminator.Size; 216 if (AssumeRelaxed) 217 Position.Address += Terminator.ExtraRelaxSize; 218} 219 220// Return a description of terminator instruction MI. 221TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { 222 TerminatorInfo Terminator; 223 Terminator.Size = TII->getInstSizeInBytes(MI); 224 if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) { 225 switch (MI->getOpcode()) { 226 case SystemZ::J: 227 // Relaxes to JG, which is 2 bytes longer. 228 Terminator.ExtraRelaxSize = 2; 229 break; 230 case SystemZ::BRC: 231 // Relaxes to BRCL, which is 2 bytes longer. 232 Terminator.ExtraRelaxSize = 2; 233 break; 234 case SystemZ::CRJ: 235 // Relaxes to a CR/BRCL sequence, which is 2 bytes longer. 236 Terminator.ExtraRelaxSize = 2; 237 break; 238 case SystemZ::CGRJ: 239 // Relaxes to a CGR/BRCL sequence, which is 4 bytes longer. 240 Terminator.ExtraRelaxSize = 4; 241 break; 242 case SystemZ::CIJ: 243 case SystemZ::CGIJ: 244 // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer. 245 Terminator.ExtraRelaxSize = 4; 246 break; 247 default: 248 llvm_unreachable("Unrecognized branch instruction"); 249 } 250 Terminator.Branch = MI; 251 Terminator.TargetBlock = 252 TII->getBranchInfo(MI).Target->getMBB()->getNumber(); 253 } 254 return Terminator; 255} 256 257// Return true if CC is live after MBBI. 258static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, 259 const TargetRegisterInfo *TRI) { 260 if (MBBI->killsRegister(SystemZ::CC, TRI)) 261 return false; 262 263 MachineBasicBlock *MBB = MBBI->getParent(); 264 MachineBasicBlock::iterator MBBE = MBB->end(); 265 for (++MBBI; MBBI != MBBE; ++MBBI) { 266 if (MBBI->readsRegister(SystemZ::CC, TRI)) 267 return true; 268 if (MBBI->definesRegister(SystemZ::CC, TRI)) 269 return false; 270 } 271 272 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 273 SE = MBB->succ_end(); SI != SE; ++SI) 274 if ((*SI)->isLiveIn(SystemZ::CC)) 275 return true; 276 277 return false; 278} 279 280// Try to fuse compare instruction Compare into a later branch. Return 281// true on success and if Compare is therefore redundant. 282bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) { 283 if (MF->getTarget().getOptLevel() == CodeGenOpt::None) 284 return false; 285 286 unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), 287 Compare); 288 if (!FusedOpcode) 289 return false; 290 291 unsigned SrcReg = Compare->getOperand(0).getReg(); 292 unsigned SrcReg2 = (Compare->getOperand(1).isReg() ? 293 Compare->getOperand(1).getReg() : 0); 294 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 295 MachineBasicBlock *MBB = Compare->getParent(); 296 MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end(); 297 for (++MBBI; MBBI != MBBE; ++MBBI) { 298 if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) { 299 // Read the branch mask and target. 300 MachineOperand CCMask(MBBI->getOperand(1)); 301 MachineOperand Target(MBBI->getOperand(2)); 302 assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && 303 "Invalid condition-code mask for integer comparison"); 304 305 // Clear out all current operands. 306 int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); 307 assert(CCUse >= 0 && "BRC must use CC"); 308 MBBI->RemoveOperand(CCUse); 309 MBBI->RemoveOperand(2); 310 MBBI->RemoveOperand(1); 311 MBBI->RemoveOperand(0); 312 313 // Rebuild MBBI as a fused compare and branch. 314 MBBI->setDesc(TII->get(FusedOpcode)); 315 MachineInstrBuilder(*MBB->getParent(), MBBI) 316 .addOperand(Compare->getOperand(0)) 317 .addOperand(Compare->getOperand(1)) 318 .addOperand(CCMask) 319 .addOperand(Target); 320 321 // Clear any intervening kills of SrcReg and SrcReg2. 322 MBBI = Compare; 323 for (++MBBI; MBBI != MBBE; ++MBBI) { 324 MBBI->clearRegisterKills(SrcReg, TRI); 325 if (SrcReg2) 326 MBBI->clearRegisterKills(SrcReg2, TRI); 327 } 328 return true; 329 } 330 331 // Stop if we find another reference to CC before a branch. 332 if (MBBI->readsRegister(SystemZ::CC, TRI) || 333 MBBI->modifiesRegister(SystemZ::CC, TRI)) 334 return false; 335 336 // Stop if we find another assignment to the registers before the branch. 337 if (MBBI->modifiesRegister(SrcReg, TRI) || 338 (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) 339 return false; 340 } 341 return false; 342} 343 344// Fill MBBs and Terminators, setting the addresses on the assumption 345// that no branches need relaxation. Return the size of the function under 346// this assumption. 347uint64_t SystemZLongBranch::initMBBInfo() { 348 MF->RenumberBlocks(); 349 unsigned NumBlocks = MF->size(); 350 351 MBBs.clear(); 352 MBBs.resize(NumBlocks); 353 354 Terminators.clear(); 355 Terminators.reserve(NumBlocks); 356 357 BlockPosition Position(MF->getAlignment()); 358 for (unsigned I = 0; I < NumBlocks; ++I) { 359 MachineBasicBlock *MBB = MF->getBlockNumbered(I); 360 MBBInfo &Block = MBBs[I]; 361 362 // Record the alignment, for quick access. 363 Block.Alignment = MBB->getAlignment(); 364 365 // Calculate the size of the fixed part of the block. 366 MachineBasicBlock::iterator MI = MBB->begin(); 367 MachineBasicBlock::iterator End = MBB->end(); 368 while (MI != End && !MI->isTerminator()) { 369 MachineInstr *Current = MI; 370 ++MI; 371 if (Current->isCompare() && fuseCompareAndBranch(Current)) 372 Current->removeFromParent(); 373 else 374 Block.Size += TII->getInstSizeInBytes(Current); 375 } 376 skipNonTerminators(Position, Block); 377 378 // Add the terminators. 379 while (MI != End) { 380 if (!MI->isDebugValue()) { 381 assert(MI->isTerminator() && "Terminator followed by non-terminator"); 382 Terminators.push_back(describeTerminator(MI)); 383 skipTerminator(Position, Terminators.back(), false); 384 ++Block.NumTerminators; 385 } 386 ++MI; 387 } 388 } 389 390 return Position.Address; 391} 392 393// Return true if, under current assumptions, Terminator would need to be 394// relaxed if it were placed at address Address. 395bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, 396 uint64_t Address) { 397 if (!Terminator.Branch) 398 return false; 399 400 const MBBInfo &Target = MBBs[Terminator.TargetBlock]; 401 if (Address >= Target.Address) { 402 if (Address - Target.Address <= MaxBackwardRange) 403 return false; 404 } else { 405 if (Target.Address - Address <= MaxForwardRange) 406 return false; 407 } 408 409 return true; 410} 411 412// Return true if, under current assumptions, any terminator needs 413// to be relaxed. 414bool SystemZLongBranch::mustRelaxABranch() { 415 for (SmallVectorImpl<TerminatorInfo>::iterator TI = Terminators.begin(), 416 TE = Terminators.end(); TI != TE; ++TI) 417 if (mustRelaxBranch(*TI, TI->Address)) 418 return true; 419 return false; 420} 421 422// Set the address of each block on the assumption that all branches 423// must be long. 424void SystemZLongBranch::setWorstCaseAddresses() { 425 SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); 426 BlockPosition Position(MF->getAlignment()); 427 for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end(); 428 BI != BE; ++BI) { 429 skipNonTerminators(Position, *BI); 430 for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { 431 skipTerminator(Position, *TI, true); 432 ++TI; 433 } 434 } 435} 436 437// Split MI into the comparison given by CompareOpcode followed 438// a BRCL on the result. 439void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, 440 unsigned CompareOpcode) { 441 MachineBasicBlock *MBB = MI->getParent(); 442 DebugLoc DL = MI->getDebugLoc(); 443 BuildMI(*MBB, MI, DL, TII->get(CompareOpcode)) 444 .addOperand(MI->getOperand(0)) 445 .addOperand(MI->getOperand(1)); 446 MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) 447 .addImm(SystemZ::CCMASK_ICMP) 448 .addOperand(MI->getOperand(2)) 449 .addOperand(MI->getOperand(3)); 450 // The implicit use of CC is a killing use. 451 BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); 452 MI->eraseFromParent(); 453} 454 455// Relax the branch described by Terminator. 456void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { 457 MachineInstr *Branch = Terminator.Branch; 458 switch (Branch->getOpcode()) { 459 case SystemZ::J: 460 Branch->setDesc(TII->get(SystemZ::JG)); 461 break; 462 case SystemZ::BRC: 463 Branch->setDesc(TII->get(SystemZ::BRCL)); 464 break; 465 case SystemZ::CRJ: 466 splitCompareBranch(Branch, SystemZ::CR); 467 break; 468 case SystemZ::CGRJ: 469 splitCompareBranch(Branch, SystemZ::CGR); 470 break; 471 case SystemZ::CIJ: 472 splitCompareBranch(Branch, SystemZ::CHI); 473 break; 474 case SystemZ::CGIJ: 475 splitCompareBranch(Branch, SystemZ::CGHI); 476 break; 477 default: 478 llvm_unreachable("Unrecognized branch"); 479 } 480 481 Terminator.Size += Terminator.ExtraRelaxSize; 482 Terminator.ExtraRelaxSize = 0; 483 Terminator.Branch = 0; 484 485 ++LongBranches; 486} 487 488// Run a shortening pass and relax any branches that need to be relaxed. 489void SystemZLongBranch::relaxBranches() { 490 SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); 491 BlockPosition Position(MF->getAlignment()); 492 for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end(); 493 BI != BE; ++BI) { 494 skipNonTerminators(Position, *BI); 495 for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { 496 assert(Position.Address <= TI->Address && 497 "Addresses shouldn't go forwards"); 498 if (mustRelaxBranch(*TI, Position.Address)) 499 relaxBranch(*TI); 500 skipTerminator(Position, *TI, false); 501 ++TI; 502 } 503 } 504} 505 506bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { 507 TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo()); 508 MF = &F; 509 uint64_t Size = initMBBInfo(); 510 if (Size <= MaxForwardRange || !mustRelaxABranch()) 511 return false; 512 513 setWorstCaseAddresses(); 514 relaxBranches(); 515 return true; 516} 517