1//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the AArch64ConditionalCompares pass which reduces 11// branching and code size by using the conditional compare instructions CCMP, 12// CCMN, and FCMP. 13// 14// The CFG transformations for forming conditional compares are very similar to 15// if-conversion, and this pass should run immediately before the early 16// if-conversion pass. 17// 18//===----------------------------------------------------------------------===// 19 20#include "AArch64.h" 21#include "llvm/ADT/BitVector.h" 22#include "llvm/ADT/DepthFirstIterator.h" 23#include "llvm/ADT/SetVector.h" 24#include "llvm/ADT/SmallPtrSet.h" 25#include "llvm/ADT/SparseSet.h" 26#include "llvm/ADT/Statistic.h" 27#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 28#include "llvm/CodeGen/MachineDominators.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFunctionPass.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineLoopInfo.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/MachineTraceMetrics.h" 35#include "llvm/CodeGen/Passes.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/raw_ostream.h" 39#include "llvm/Target/TargetInstrInfo.h" 40#include "llvm/Target/TargetRegisterInfo.h" 41#include "llvm/Target/TargetSubtargetInfo.h" 42 43using namespace llvm; 44 45#define DEBUG_TYPE "aarch64-ccmp" 46 47// Absolute maximum number of instructions allowed per speculated block. 48// This bypasses all other heuristics, so it should be set fairly high. 49static cl::opt<unsigned> BlockInstrLimit( 50 "aarch64-ccmp-limit", cl::init(30), cl::Hidden, 51 cl::desc("Maximum number of instructions per speculated block.")); 52 53// Stress testing mode - disable heuristics. 54static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden, 55 cl::desc("Turn all knobs to 11")); 56 57STATISTIC(NumConsidered, "Number of ccmps considered"); 58STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); 59STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); 60STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)"); 61STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)"); 62STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)"); 63STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)"); 64STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)"); 65STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)"); 66STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)"); 67STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)"); 68 69STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)"); 70 71STATISTIC(NumConverted, "Number of ccmp instructions created"); 72STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); 73 74//===----------------------------------------------------------------------===// 75// SSACCmpConv 76//===----------------------------------------------------------------------===// 77// 78// The SSACCmpConv class performs ccmp-conversion on SSA form machine code 79// after determining if it is possible. The class contains no heuristics; 80// external code should be used to determine when ccmp-conversion is a good 81// idea. 82// 83// CCmp-formation works on a CFG representing chained conditions, typically 84// from C's short-circuit || and && operators: 85// 86// From: Head To: Head 87// / | CmpBB 88// / | / | 89// | CmpBB / | 90// | / | Tail | 91// | / | | | 92// Tail | | | 93// | | | | 94// ... ... ... ... 95// 96// The Head block is terminated by a br.cond instruction, and the CmpBB block 97// contains compare + br.cond. Tail must be a successor of both. 98// 99// The cmp-conversion turns the compare instruction in CmpBB into a conditional 100// compare, and merges CmpBB into Head, speculatively executing its 101// instructions. The AArch64 conditional compare instructions have an immediate 102// operand that specifies the NZCV flag values when the condition is false and 103// the compare isn't executed. This makes it possible to chain compares with 104// different condition codes. 105// 106// Example: 107// 108// if (a == 5 || b == 17) 109// foo(); 110// 111// Head: 112// cmp w0, #5 113// b.eq Tail 114// CmpBB: 115// cmp w1, #17 116// b.eq Tail 117// ... 118// Tail: 119// bl _foo 120// 121// Becomes: 122// 123// Head: 124// cmp w0, #5 125// ccmp w1, #17, 4, ne ; 4 = nZcv 126// b.eq Tail 127// ... 128// Tail: 129// bl _foo 130// 131// The ccmp condition code is the one that would cause the Head terminator to 132// branch to CmpBB. 133// 134// FIXME: It should also be possible to speculate a block on the critical edge 135// between Head and Tail, just like if-converting a diamond. 136// 137// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion). 138 139namespace { 140class SSACCmpConv { 141 MachineFunction *MF; 142 const TargetInstrInfo *TII; 143 const TargetRegisterInfo *TRI; 144 MachineRegisterInfo *MRI; 145 146public: 147 /// The first block containing a conditional branch, dominating everything 148 /// else. 149 MachineBasicBlock *Head; 150 151 /// The block containing cmp+br.cond with a successor shared with Head. 152 MachineBasicBlock *CmpBB; 153 154 /// The common successor for Head and CmpBB. 155 MachineBasicBlock *Tail; 156 157 /// The compare instruction in CmpBB that can be converted to a ccmp. 158 MachineInstr *CmpMI; 159 160private: 161 /// The branch condition in Head as determined by AnalyzeBranch. 162 SmallVector<MachineOperand, 4> HeadCond; 163 164 /// The condition code that makes Head branch to CmpBB. 165 AArch64CC::CondCode HeadCmpBBCC; 166 167 /// The branch condition in CmpBB. 168 SmallVector<MachineOperand, 4> CmpBBCond; 169 170 /// The condition code that makes CmpBB branch to Tail. 171 AArch64CC::CondCode CmpBBTailCC; 172 173 /// Check if the Tail PHIs are trivially convertible. 174 bool trivialTailPHIs(); 175 176 /// Remove CmpBB from the Tail PHIs. 177 void updateTailPHIs(); 178 179 /// Check if an operand defining DstReg is dead. 180 bool isDeadDef(unsigned DstReg); 181 182 /// Find the compare instruction in MBB that controls the conditional branch. 183 /// Return NULL if a convertible instruction can't be found. 184 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB); 185 186 /// Return true if all non-terminator instructions in MBB can be safely 187 /// speculated. 188 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI); 189 190public: 191 /// runOnMachineFunction - Initialize per-function data structures. 192 void runOnMachineFunction(MachineFunction &MF) { 193 this->MF = &MF; 194 TII = MF.getTarget().getInstrInfo(); 195 TRI = MF.getTarget().getRegisterInfo(); 196 MRI = &MF.getRegInfo(); 197 } 198 199 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the 200 /// internal state, and return true. 201 bool canConvert(MachineBasicBlock *MBB); 202 203 /// Cmo-convert the last block passed to canConvertCmp(), assuming 204 /// it is possible. Add any erased blocks to RemovedBlocks. 205 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks); 206 207 /// Return the expected code size delta if the conversion into a 208 /// conditional compare is performed. 209 int expectedCodeSizeDelta() const; 210}; 211} // end anonymous namespace 212 213// Check that all PHIs in Tail are selecting the same value from Head and CmpBB. 214// This means that no if-conversion is required when merging CmpBB into Head. 215bool SSACCmpConv::trivialTailPHIs() { 216 for (auto &I : *Tail) { 217 if (!I.isPHI()) 218 break; 219 unsigned HeadReg = 0, CmpBBReg = 0; 220 // PHI operands come in (VReg, MBB) pairs. 221 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) { 222 MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB(); 223 unsigned Reg = I.getOperand(oi).getReg(); 224 if (MBB == Head) { 225 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); 226 HeadReg = Reg; 227 } 228 if (MBB == CmpBB) { 229 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands"); 230 CmpBBReg = Reg; 231 } 232 } 233 if (HeadReg != CmpBBReg) 234 return false; 235 } 236 return true; 237} 238 239// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply 240// removing the CmpBB operands. The Head operands will be identical. 241void SSACCmpConv::updateTailPHIs() { 242 for (auto &I : *Tail) { 243 if (!I.isPHI()) 244 break; 245 // I is a PHI. It can have multiple entries for CmpBB. 246 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) { 247 // PHI operands are (Reg, MBB) at (oi-2, oi-1). 248 if (I.getOperand(oi - 1).getMBB() == CmpBB) { 249 I.RemoveOperand(oi - 1); 250 I.RemoveOperand(oi - 2); 251 } 252 } 253 } 254} 255 256// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares 257// are still writing virtual registers without any uses. 258bool SSACCmpConv::isDeadDef(unsigned DstReg) { 259 // Writes to the zero register are dead. 260 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) 261 return true; 262 if (!TargetRegisterInfo::isVirtualRegister(DstReg)) 263 return false; 264 // A virtual register def without any uses will be marked dead later, and 265 // eventually replaced by the zero register. 266 return MRI->use_nodbg_empty(DstReg); 267} 268 269// Parse a condition code returned by AnalyzeBranch, and compute the CondCode 270// corresponding to TBB. 271// Return 272static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) { 273 // A normal br.cond simply has the condition code. 274 if (Cond[0].getImm() != -1) { 275 assert(Cond.size() == 1 && "Unknown Cond array format"); 276 CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 277 return true; 278 } 279 // For tbz and cbz instruction, the opcode is next. 280 switch (Cond[1].getImm()) { 281 default: 282 // This includes tbz / tbnz branches which can't be converted to 283 // ccmp + br.cond. 284 return false; 285 case AArch64::CBZW: 286 case AArch64::CBZX: 287 assert(Cond.size() == 3 && "Unknown Cond array format"); 288 CC = AArch64CC::EQ; 289 return true; 290 case AArch64::CBNZW: 291 case AArch64::CBNZX: 292 assert(Cond.size() == 3 && "Unknown Cond array format"); 293 CC = AArch64CC::NE; 294 return true; 295 } 296} 297 298MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { 299 MachineBasicBlock::iterator I = MBB->getFirstTerminator(); 300 if (I == MBB->end()) 301 return nullptr; 302 // The terminator must be controlled by the flags. 303 if (!I->readsRegister(AArch64::NZCV)) { 304 switch (I->getOpcode()) { 305 case AArch64::CBZW: 306 case AArch64::CBZX: 307 case AArch64::CBNZW: 308 case AArch64::CBNZX: 309 // These can be converted into a ccmp against #0. 310 return I; 311 } 312 ++NumCmpTermRejs; 313 DEBUG(dbgs() << "Flags not used by terminator: " << *I); 314 return nullptr; 315 } 316 317 // Now find the instruction controlling the terminator. 318 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { 319 --I; 320 assert(!I->isTerminator() && "Spurious terminator"); 321 switch (I->getOpcode()) { 322 // cmp is an alias for subs with a dead destination register. 323 case AArch64::SUBSWri: 324 case AArch64::SUBSXri: 325 // cmn is an alias for adds with a dead destination register. 326 case AArch64::ADDSWri: 327 case AArch64::ADDSXri: 328 // Check that the immediate operand is within range, ccmp wants a uimm5. 329 // Rd = SUBSri Rn, imm, shift 330 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { 331 DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I); 332 ++NumImmRangeRejs; 333 return nullptr; 334 } 335 // Fall through. 336 case AArch64::SUBSWrr: 337 case AArch64::SUBSXrr: 338 case AArch64::ADDSWrr: 339 case AArch64::ADDSXrr: 340 if (isDeadDef(I->getOperand(0).getReg())) 341 return I; 342 DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); 343 ++NumLiveDstRejs; 344 return nullptr; 345 case AArch64::FCMPSrr: 346 case AArch64::FCMPDrr: 347 case AArch64::FCMPESrr: 348 case AArch64::FCMPEDrr: 349 return I; 350 } 351 352 // Check for flag reads and clobbers. 353 MIOperands::PhysRegInfo PRI = 354 MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI); 355 356 if (PRI.Reads) { 357 // The ccmp doesn't produce exactly the same flags as the original 358 // compare, so reject the transform if there are uses of the flags 359 // besides the terminators. 360 DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I); 361 ++NumMultNZCVUses; 362 return nullptr; 363 } 364 365 if (PRI.Clobbers) { 366 DEBUG(dbgs() << "Not convertible compare: " << *I); 367 ++NumUnknNZCVDefs; 368 return nullptr; 369 } 370 } 371 DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); 372 return nullptr; 373} 374 375/// Determine if all the instructions in MBB can safely 376/// be speculated. The terminators are not considered. 377/// 378/// Only CmpMI is allowed to clobber the flags. 379/// 380bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, 381 const MachineInstr *CmpMI) { 382 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to 383 // get right. 384 if (!MBB->livein_empty()) { 385 DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); 386 return false; 387 } 388 389 unsigned InstrCount = 0; 390 391 // Check all instructions, except the terminators. It is assumed that 392 // terminators never have side effects or define any used register values. 393 for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) { 394 if (I.isDebugValue()) 395 continue; 396 397 if (++InstrCount > BlockInstrLimit && !Stress) { 398 DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " 399 << BlockInstrLimit << " instructions.\n"); 400 return false; 401 } 402 403 // There shouldn't normally be any phis in a single-predecessor block. 404 if (I.isPHI()) { 405 DEBUG(dbgs() << "Can't hoist: " << I); 406 return false; 407 } 408 409 // Don't speculate loads. Note that it may be possible and desirable to 410 // speculate GOT or constant pool loads that are guaranteed not to trap, 411 // but we don't support that for now. 412 if (I.mayLoad()) { 413 DEBUG(dbgs() << "Won't speculate load: " << I); 414 return false; 415 } 416 417 // We never speculate stores, so an AA pointer isn't necessary. 418 bool DontMoveAcrossStore = true; 419 if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { 420 DEBUG(dbgs() << "Can't speculate: " << I); 421 return false; 422 } 423 424 // Only CmpMI is allowed to clobber the flags. 425 if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) { 426 DEBUG(dbgs() << "Clobbers flags: " << I); 427 return false; 428 } 429 } 430 return true; 431} 432 433/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential 434/// candidate for cmp-conversion. Fill out the internal state. 435/// 436bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { 437 Head = MBB; 438 Tail = CmpBB = nullptr; 439 440 if (Head->succ_size() != 2) 441 return false; 442 MachineBasicBlock *Succ0 = Head->succ_begin()[0]; 443 MachineBasicBlock *Succ1 = Head->succ_begin()[1]; 444 445 // CmpBB can only have a single predecessor. Tail is allowed many. 446 if (Succ0->pred_size() != 1) 447 std::swap(Succ0, Succ1); 448 449 // Succ0 is our candidate for CmpBB. 450 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2) 451 return false; 452 453 CmpBB = Succ0; 454 Tail = Succ1; 455 456 if (!CmpBB->isSuccessor(Tail)) 457 return false; 458 459 // The CFG topology checks out. 460 DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#" 461 << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n'); 462 ++NumConsidered; 463 464 // Tail is allowed to have many predecessors, but we can't handle PHIs yet. 465 // 466 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are 467 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should 468 // always be safe to sink the ccmp down to immediately before the CmpBB 469 // terminators. 470 if (!trivialTailPHIs()) { 471 DEBUG(dbgs() << "Can't handle phis in Tail.\n"); 472 ++NumPhiRejs; 473 return false; 474 } 475 476 if (!Tail->livein_empty()) { 477 DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n"); 478 ++NumPhysRejs; 479 return false; 480 } 481 482 // CmpBB should never have PHIs since Head is its only predecessor. 483 // FIXME: Clean them up if it happens. 484 if (!CmpBB->empty() && CmpBB->front().isPHI()) { 485 DEBUG(dbgs() << "Can't handle phis in CmpBB.\n"); 486 ++NumPhi2Rejs; 487 return false; 488 } 489 490 if (!CmpBB->livein_empty()) { 491 DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n"); 492 ++NumPhysRejs; 493 return false; 494 } 495 496 // The branch we're looking to eliminate must be analyzable. 497 HeadCond.clear(); 498 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 499 if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) { 500 DEBUG(dbgs() << "Head branch not analyzable.\n"); 501 ++NumHeadBranchRejs; 502 return false; 503 } 504 505 // This is weird, probably some sort of degenerate CFG, or an edge to a 506 // landing pad. 507 if (!TBB || HeadCond.empty()) { 508 DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n"); 509 ++NumHeadBranchRejs; 510 return false; 511 } 512 513 if (!parseCond(HeadCond, HeadCmpBBCC)) { 514 DEBUG(dbgs() << "Unsupported branch type on Head\n"); 515 ++NumHeadBranchRejs; 516 return false; 517 } 518 519 // Make sure the branch direction is right. 520 if (TBB != CmpBB) { 521 assert(TBB == Tail && "Unexpected TBB"); 522 HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC); 523 } 524 525 CmpBBCond.clear(); 526 TBB = FBB = nullptr; 527 if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) { 528 DEBUG(dbgs() << "CmpBB branch not analyzable.\n"); 529 ++NumCmpBranchRejs; 530 return false; 531 } 532 533 if (!TBB || CmpBBCond.empty()) { 534 DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n"); 535 ++NumCmpBranchRejs; 536 return false; 537 } 538 539 if (!parseCond(CmpBBCond, CmpBBTailCC)) { 540 DEBUG(dbgs() << "Unsupported branch type on CmpBB\n"); 541 ++NumCmpBranchRejs; 542 return false; 543 } 544 545 if (TBB != Tail) 546 CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC); 547 548 DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC) 549 << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC) 550 << '\n'); 551 552 CmpMI = findConvertibleCompare(CmpBB); 553 if (!CmpMI) 554 return false; 555 556 if (!canSpeculateInstrs(CmpBB, CmpMI)) { 557 ++NumSpeculateRejs; 558 return false; 559 } 560 return true; 561} 562 563void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { 564 DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" 565 << Head->getNumber() << ":\n" << *CmpBB); 566 567 // All CmpBB instructions are moved into Head, and CmpBB is deleted. 568 // Update the CFG first. 569 updateTailPHIs(); 570 Head->removeSuccessor(CmpBB); 571 CmpBB->removeSuccessor(Tail); 572 Head->transferSuccessorsAndUpdatePHIs(CmpBB); 573 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); 574 TII->RemoveBranch(*Head); 575 576 // If the Head terminator was one of the cbz / tbz branches with built-in 577 // compare, we need to insert an explicit compare instruction in its place. 578 if (HeadCond[0].getImm() == -1) { 579 ++NumCompBranches; 580 unsigned Opc = 0; 581 switch (HeadCond[1].getImm()) { 582 case AArch64::CBZW: 583 case AArch64::CBNZW: 584 Opc = AArch64::SUBSWri; 585 break; 586 case AArch64::CBZX: 587 case AArch64::CBNZX: 588 Opc = AArch64::SUBSXri; 589 break; 590 default: 591 llvm_unreachable("Cannot convert Head branch"); 592 } 593 const MCInstrDesc &MCID = TII->get(Opc); 594 // Create a dummy virtual register for the SUBS def. 595 unsigned DestReg = 596 MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); 597 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. 598 BuildMI(*Head, Head->end(), TermDL, MCID) 599 .addReg(DestReg, RegState::Define | RegState::Dead) 600 .addOperand(HeadCond[2]) 601 .addImm(0) 602 .addImm(0); 603 // SUBS uses the GPR*sp register classes. 604 MRI->constrainRegClass(HeadCond[2].getReg(), 605 TII->getRegClass(MCID, 1, TRI, *MF)); 606 } 607 608 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); 609 610 // Now replace CmpMI with a ccmp instruction that also considers the incoming 611 // flags. 612 unsigned Opc = 0; 613 unsigned FirstOp = 1; // First CmpMI operand to copy. 614 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction. 615 switch (CmpMI->getOpcode()) { 616 default: 617 llvm_unreachable("Unknown compare opcode"); 618 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break; 619 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break; 620 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break; 621 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break; 622 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break; 623 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break; 624 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break; 625 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break; 626 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break; 627 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break; 628 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break; 629 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break; 630 case AArch64::CBZW: 631 case AArch64::CBNZW: 632 Opc = AArch64::CCMPWi; 633 FirstOp = 0; 634 isZBranch = true; 635 break; 636 case AArch64::CBZX: 637 case AArch64::CBNZX: 638 Opc = AArch64::CCMPXi; 639 FirstOp = 0; 640 isZBranch = true; 641 break; 642 } 643 644 // The ccmp instruction should set the flags according to the comparison when 645 // Head would have branched to CmpBB. 646 // The NZCV immediate operand should provide flags for the case where Head 647 // would have branched to Tail. These flags should cause the new Head 648 // terminator to branch to tail. 649 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); 650 const MCInstrDesc &MCID = TII->get(Opc); 651 MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), 652 TII->getRegClass(MCID, 0, TRI, *MF)); 653 if (CmpMI->getOperand(FirstOp + 1).isReg()) 654 MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), 655 TII->getRegClass(MCID, 1, TRI, *MF)); 656 MachineInstrBuilder MIB = 657 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) 658 .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn 659 if (isZBranch) 660 MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0 661 else 662 MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate 663 MIB.addImm(NZCV).addImm(HeadCmpBBCC); 664 665 // If CmpMI was a terminator, we need a new conditional branch to replace it. 666 // This now becomes a Head terminator. 667 if (isZBranch) { 668 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW || 669 CmpMI->getOpcode() == AArch64::CBNZX; 670 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc)) 671 .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ) 672 .addOperand(CmpMI->getOperand(1)); // Branch target. 673 } 674 CmpMI->eraseFromParent(); 675 Head->updateTerminator(); 676 677 RemovedBlocks.push_back(CmpBB); 678 CmpBB->eraseFromParent(); 679 DEBUG(dbgs() << "Result:\n" << *Head); 680 ++NumConverted; 681} 682 683int SSACCmpConv::expectedCodeSizeDelta() const { 684 int delta = 0; 685 // If the Head terminator was one of the cbz / tbz branches with built-in 686 // compare, we need to insert an explicit compare instruction in its place 687 // plus a branch instruction. 688 if (HeadCond[0].getImm() == -1) { 689 switch (HeadCond[1].getImm()) { 690 case AArch64::CBZW: 691 case AArch64::CBNZW: 692 case AArch64::CBZX: 693 case AArch64::CBNZX: 694 // Therefore delta += 1 695 delta = 1; 696 break; 697 default: 698 llvm_unreachable("Cannot convert Head branch"); 699 } 700 } 701 // If the Cmp terminator was one of the cbz / tbz branches with 702 // built-in compare, it will be turned into a compare instruction 703 // into Head, but we do not save any instruction. 704 // Otherwise, we save the branch instruction. 705 switch (CmpMI->getOpcode()) { 706 default: 707 --delta; 708 break; 709 case AArch64::CBZW: 710 case AArch64::CBNZW: 711 case AArch64::CBZX: 712 case AArch64::CBNZX: 713 break; 714 } 715 return delta; 716} 717 718//===----------------------------------------------------------------------===// 719// AArch64ConditionalCompares Pass 720//===----------------------------------------------------------------------===// 721 722namespace { 723class AArch64ConditionalCompares : public MachineFunctionPass { 724 const TargetInstrInfo *TII; 725 const TargetRegisterInfo *TRI; 726 const MCSchedModel *SchedModel; 727 // Does the proceeded function has Oz attribute. 728 bool MinSize; 729 MachineRegisterInfo *MRI; 730 MachineDominatorTree *DomTree; 731 MachineLoopInfo *Loops; 732 MachineTraceMetrics *Traces; 733 MachineTraceMetrics::Ensemble *MinInstr; 734 SSACCmpConv CmpConv; 735 736public: 737 static char ID; 738 AArch64ConditionalCompares() : MachineFunctionPass(ID) {} 739 void getAnalysisUsage(AnalysisUsage &AU) const override; 740 bool runOnMachineFunction(MachineFunction &MF) override; 741 const char *getPassName() const override { 742 return "AArch64 Conditional Compares"; 743 } 744 745private: 746 bool tryConvert(MachineBasicBlock *); 747 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed); 748 void updateLoops(ArrayRef<MachineBasicBlock *> Removed); 749 void invalidateTraces(); 750 bool shouldConvert(); 751}; 752} // end anonymous namespace 753 754char AArch64ConditionalCompares::ID = 0; 755 756namespace llvm { 757void initializeAArch64ConditionalComparesPass(PassRegistry &); 758} 759 760INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", 761 "AArch64 CCMP Pass", false, false) 762INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 763INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 764INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) 765INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", 766 "AArch64 CCMP Pass", false, false) 767 768FunctionPass *llvm::createAArch64ConditionalCompares() { 769 return new AArch64ConditionalCompares(); 770} 771 772void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { 773 AU.addRequired<MachineBranchProbabilityInfo>(); 774 AU.addRequired<MachineDominatorTree>(); 775 AU.addPreserved<MachineDominatorTree>(); 776 AU.addRequired<MachineLoopInfo>(); 777 AU.addPreserved<MachineLoopInfo>(); 778 AU.addRequired<MachineTraceMetrics>(); 779 AU.addPreserved<MachineTraceMetrics>(); 780 MachineFunctionPass::getAnalysisUsage(AU); 781} 782 783/// Update the dominator tree after if-conversion erased some blocks. 784void AArch64ConditionalCompares::updateDomTree( 785 ArrayRef<MachineBasicBlock *> Removed) { 786 // convert() removes CmpBB which was previously dominated by Head. 787 // CmpBB children should be transferred to Head. 788 MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); 789 for (unsigned i = 0, e = Removed.size(); i != e; ++i) { 790 MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); 791 assert(Node != HeadNode && "Cannot erase the head node"); 792 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head"); 793 while (Node->getNumChildren()) 794 DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); 795 DomTree->eraseNode(Removed[i]); 796 } 797} 798 799/// Update LoopInfo after if-conversion. 800void 801AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) { 802 if (!Loops) 803 return; 804 for (unsigned i = 0, e = Removed.size(); i != e; ++i) 805 Loops->removeBlock(Removed[i]); 806} 807 808/// Invalidate MachineTraceMetrics before if-conversion. 809void AArch64ConditionalCompares::invalidateTraces() { 810 Traces->invalidate(CmpConv.Head); 811 Traces->invalidate(CmpConv.CmpBB); 812} 813 814/// Apply cost model and heuristics to the if-conversion in IfConv. 815/// Return true if the conversion is a good idea. 816/// 817bool AArch64ConditionalCompares::shouldConvert() { 818 // Stress testing mode disables all cost considerations. 819 if (Stress) 820 return true; 821 if (!MinInstr) 822 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 823 824 // Head dominates CmpBB, so it is always included in its trace. 825 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB); 826 827 // If code size is the main concern 828 if (MinSize) { 829 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta(); 830 DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n'); 831 // If we are minimizing the code size, do the conversion whatever 832 // the cost is. 833 if (CodeSizeDelta < 0) 834 return true; 835 if (CodeSizeDelta > 0) { 836 DEBUG(dbgs() << "Code size is increasing, give up on this one.\n"); 837 return false; 838 } 839 // CodeSizeDelta == 0, continue with the regular heuristics 840 } 841 842 // Heuristic: The compare conversion delays the execution of the branch 843 // instruction because we must wait for the inputs to the second compare as 844 // well. The branch has no dependent instructions, but delaying it increases 845 // the cost of a misprediction. 846 // 847 // Set a limit on the delay we will accept. 848 unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4; 849 850 // Instruction depths can be computed for all trace instructions above CmpBB. 851 unsigned HeadDepth = 852 Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth; 853 unsigned CmpBBDepth = 854 Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth; 855 DEBUG(dbgs() << "Head depth: " << HeadDepth 856 << "\nCmpBB depth: " << CmpBBDepth << '\n'); 857 if (CmpBBDepth > HeadDepth + DelayLimit) { 858 DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit 859 << " cycles.\n"); 860 return false; 861 } 862 863 // Check the resource depth at the bottom of CmpBB - these instructions will 864 // be speculated. 865 unsigned ResDepth = Trace.getResourceDepth(true); 866 DEBUG(dbgs() << "Resources: " << ResDepth << '\n'); 867 868 // Heuristic: The speculatively executed instructions must all be able to 869 // merge into the Head block. The Head critical path should dominate the 870 // resource cost of the speculated instructions. 871 if (ResDepth > HeadDepth) { 872 DEBUG(dbgs() << "Too many instructions to speculate.\n"); 873 return false; 874 } 875 return true; 876} 877 878bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { 879 bool Changed = false; 880 while (CmpConv.canConvert(MBB) && shouldConvert()) { 881 invalidateTraces(); 882 SmallVector<MachineBasicBlock *, 4> RemovedBlocks; 883 CmpConv.convert(RemovedBlocks); 884 Changed = true; 885 updateDomTree(RemovedBlocks); 886 updateLoops(RemovedBlocks); 887 } 888 return Changed; 889} 890 891bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { 892 DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" 893 << "********** Function: " << MF.getName() << '\n'); 894 TII = MF.getTarget().getInstrInfo(); 895 TRI = MF.getTarget().getRegisterInfo(); 896 SchedModel = 897 MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); 898 MRI = &MF.getRegInfo(); 899 DomTree = &getAnalysis<MachineDominatorTree>(); 900 Loops = getAnalysisIfAvailable<MachineLoopInfo>(); 901 Traces = &getAnalysis<MachineTraceMetrics>(); 902 MinInstr = nullptr; 903 MinSize = MF.getFunction()->getAttributes().hasAttribute( 904 AttributeSet::FunctionIndex, Attribute::MinSize); 905 906 bool Changed = false; 907 CmpConv.runOnMachineFunction(MF); 908 909 // Visit blocks in dominator tree pre-order. The pre-order enables multiple 910 // cmp-conversions from the same head block. 911 // Note that updateDomTree() modifies the children of the DomTree node 912 // currently being visited. The df_iterator supports that; it doesn't look at 913 // child_begin() / child_end() until after a node has been visited. 914 for (auto *I : depth_first(DomTree)) 915 if (tryConvert(I->getBlock())) 916 Changed = true; 917 918 return Changed; 919} 920