1//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass looks for safe point where the prologue and epilogue can be 11// inserted. 12// The safe point for the prologue (resp. epilogue) is called Save 13// (resp. Restore). 14// A point is safe for prologue (resp. epilogue) if and only if 15// it 1) dominates (resp. post-dominates) all the frame related operations and 16// between 2) two executions of the Save (resp. Restore) point there is an 17// execution of the Restore (resp. Save) point. 18// 19// For instance, the following points are safe: 20// for (int i = 0; i < 10; ++i) { 21// Save 22// ... 23// Restore 24// } 25// Indeed, the execution looks like Save -> Restore -> Save -> Restore ... 26// And the following points are not: 27// for (int i = 0; i < 10; ++i) { 28// Save 29// ... 30// } 31// for (int i = 0; i < 10; ++i) { 32// ... 33// Restore 34// } 35// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore. 36// 37// This pass also ensures that the safe points are 3) cheaper than the regular 38// entry and exits blocks. 39// 40// Property #1 is ensured via the use of MachineDominatorTree and 41// MachinePostDominatorTree. 42// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both 43// points must be in the same loop. 44// Property #3 is ensured via the MachineBlockFrequencyInfo. 45// 46// If this pass found points matching all these properties, then 47// MachineFrameInfo is updated this that information. 48//===----------------------------------------------------------------------===// 49#include "llvm/ADT/BitVector.h" 50#include "llvm/ADT/SetVector.h" 51#include "llvm/ADT/Statistic.h" 52// To check for profitability. 53#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 54// For property #1 for Save. 55#include "llvm/CodeGen/MachineDominators.h" 56#include "llvm/CodeGen/MachineFunctionPass.h" 57// To record the result of the analysis. 58#include "llvm/CodeGen/MachineFrameInfo.h" 59// For property #2. 60#include "llvm/CodeGen/MachineLoopInfo.h" 61// For property #1 for Restore. 62#include "llvm/CodeGen/MachinePostDominators.h" 63#include "llvm/CodeGen/Passes.h" 64// To know about callee-saved. 65#include "llvm/CodeGen/RegisterClassInfo.h" 66#include "llvm/CodeGen/RegisterScavenging.h" 67#include "llvm/MC/MCAsmInfo.h" 68#include "llvm/Support/Debug.h" 69// To query the target about frame lowering. 70#include "llvm/Target/TargetFrameLowering.h" 71// To know about frame setup operation. 72#include "llvm/Target/TargetInstrInfo.h" 73#include "llvm/Target/TargetMachine.h" 74// To access TargetInstrInfo. 75#include "llvm/Target/TargetSubtargetInfo.h" 76 77#define DEBUG_TYPE "shrink-wrap" 78 79using namespace llvm; 80 81STATISTIC(NumFunc, "Number of functions"); 82STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); 83STATISTIC(NumCandidatesDropped, 84 "Number of shrink-wrapping candidates dropped because of frequency"); 85 86static cl::opt<cl::boolOrDefault> 87 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, 88 cl::desc("enable the shrink-wrapping pass")); 89 90namespace { 91/// \brief Class to determine where the safe point to insert the 92/// prologue and epilogue are. 93/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the 94/// shrink-wrapping term for prologue/epilogue placement, this pass 95/// does not rely on expensive data-flow analysis. Instead we use the 96/// dominance properties and loop information to decide which point 97/// are safe for such insertion. 98class ShrinkWrap : public MachineFunctionPass { 99 /// Hold callee-saved information. 100 RegisterClassInfo RCI; 101 MachineDominatorTree *MDT; 102 MachinePostDominatorTree *MPDT; 103 /// Current safe point found for the prologue. 104 /// The prologue will be inserted before the first instruction 105 /// in this basic block. 106 MachineBasicBlock *Save; 107 /// Current safe point found for the epilogue. 108 /// The epilogue will be inserted before the first terminator instruction 109 /// in this basic block. 110 MachineBasicBlock *Restore; 111 /// Hold the information of the basic block frequency. 112 /// Use to check the profitability of the new points. 113 MachineBlockFrequencyInfo *MBFI; 114 /// Hold the loop information. Used to determine if Save and Restore 115 /// are in the same loop. 116 MachineLoopInfo *MLI; 117 /// Frequency of the Entry block. 118 uint64_t EntryFreq; 119 /// Current opcode for frame setup. 120 unsigned FrameSetupOpcode; 121 /// Current opcode for frame destroy. 122 unsigned FrameDestroyOpcode; 123 /// Entry block. 124 const MachineBasicBlock *Entry; 125 typedef SmallSetVector<unsigned, 16> SetOfRegs; 126 /// Registers that need to be saved for the current function. 127 mutable SetOfRegs CurrentCSRs; 128 /// Current MachineFunction. 129 MachineFunction *MachineFunc; 130 131 /// \brief Check if \p MI uses or defines a callee-saved register or 132 /// a frame index. If this is the case, this means \p MI must happen 133 /// after Save and before Restore. 134 bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; 135 136 const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { 137 if (CurrentCSRs.empty()) { 138 BitVector SavedRegs; 139 const TargetFrameLowering *TFI = 140 MachineFunc->getSubtarget().getFrameLowering(); 141 142 TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); 143 144 for (int Reg = SavedRegs.find_first(); Reg != -1; 145 Reg = SavedRegs.find_next(Reg)) 146 CurrentCSRs.insert((unsigned)Reg); 147 } 148 return CurrentCSRs; 149 } 150 151 /// \brief Update the Save and Restore points such that \p MBB is in 152 /// the region that is dominated by Save and post-dominated by Restore 153 /// and Save and Restore still match the safe point definition. 154 /// Such point may not exist and Save and/or Restore may be null after 155 /// this call. 156 void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); 157 158 /// \brief Initialize the pass for \p MF. 159 void init(MachineFunction &MF) { 160 RCI.runOnMachineFunction(MF); 161 MDT = &getAnalysis<MachineDominatorTree>(); 162 MPDT = &getAnalysis<MachinePostDominatorTree>(); 163 Save = nullptr; 164 Restore = nullptr; 165 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 166 MLI = &getAnalysis<MachineLoopInfo>(); 167 EntryFreq = MBFI->getEntryFreq(); 168 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 169 FrameSetupOpcode = TII.getCallFrameSetupOpcode(); 170 FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); 171 Entry = &MF.front(); 172 CurrentCSRs.clear(); 173 MachineFunc = &MF; 174 175 ++NumFunc; 176 } 177 178 /// Check whether or not Save and Restore points are still interesting for 179 /// shrink-wrapping. 180 bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } 181 182 /// \brief Check if shrink wrapping is enabled for this target and function. 183 static bool isShrinkWrapEnabled(const MachineFunction &MF); 184 185public: 186 static char ID; 187 188 ShrinkWrap() : MachineFunctionPass(ID) { 189 initializeShrinkWrapPass(*PassRegistry::getPassRegistry()); 190 } 191 192 void getAnalysisUsage(AnalysisUsage &AU) const override { 193 AU.setPreservesAll(); 194 AU.addRequired<MachineBlockFrequencyInfo>(); 195 AU.addRequired<MachineDominatorTree>(); 196 AU.addRequired<MachinePostDominatorTree>(); 197 AU.addRequired<MachineLoopInfo>(); 198 MachineFunctionPass::getAnalysisUsage(AU); 199 } 200 201 const char *getPassName() const override { 202 return "Shrink Wrapping analysis"; 203 } 204 205 /// \brief Perform the shrink-wrapping analysis and update 206 /// the MachineFrameInfo attached to \p MF with the results. 207 bool runOnMachineFunction(MachineFunction &MF) override; 208}; 209} // End anonymous namespace. 210 211char ShrinkWrap::ID = 0; 212char &llvm::ShrinkWrapID = ShrinkWrap::ID; 213 214INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, 215 false) 216INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) 217INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 218INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 219INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 220INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) 221 222bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, 223 RegScavenger *RS) const { 224 if (MI.getOpcode() == FrameSetupOpcode || 225 MI.getOpcode() == FrameDestroyOpcode) { 226 DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); 227 return true; 228 } 229 for (const MachineOperand &MO : MI.operands()) { 230 bool UseOrDefCSR = false; 231 if (MO.isReg()) { 232 unsigned PhysReg = MO.getReg(); 233 if (!PhysReg) 234 continue; 235 assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && 236 "Unallocated register?!"); 237 UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); 238 } else if (MO.isRegMask()) { 239 // Check if this regmask clobbers any of the CSRs. 240 for (unsigned Reg : getCurrentCSRs(RS)) { 241 if (MO.clobbersPhysReg(Reg)) { 242 UseOrDefCSR = true; 243 break; 244 } 245 } 246 } 247 if (UseOrDefCSR || MO.isFI()) { 248 DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" 249 << MO.isFI() << "): " << MI << '\n'); 250 return true; 251 } 252 } 253 return false; 254} 255 256/// \brief Helper function to find the immediate (post) dominator. 257template <typename ListOfBBs, typename DominanceAnalysis> 258MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, 259 DominanceAnalysis &Dom) { 260 MachineBasicBlock *IDom = &Block; 261 for (MachineBasicBlock *BB : BBs) { 262 IDom = Dom.findNearestCommonDominator(IDom, BB); 263 if (!IDom) 264 break; 265 } 266 return IDom; 267} 268 269void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, 270 RegScavenger *RS) { 271 // Get rid of the easy cases first. 272 if (!Save) 273 Save = &MBB; 274 else 275 Save = MDT->findNearestCommonDominator(Save, &MBB); 276 277 if (!Save) { 278 DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); 279 return; 280 } 281 282 if (!Restore) 283 Restore = &MBB; 284 else 285 Restore = MPDT->findNearestCommonDominator(Restore, &MBB); 286 287 // Make sure we would be able to insert the restore code before the 288 // terminator. 289 if (Restore == &MBB) { 290 for (const MachineInstr &Terminator : MBB.terminators()) { 291 if (!useOrDefCSROrFI(Terminator, RS)) 292 continue; 293 // One of the terminator needs to happen before the restore point. 294 if (MBB.succ_empty()) { 295 Restore = nullptr; 296 break; 297 } 298 // Look for a restore point that post-dominates all the successors. 299 // The immediate post-dominator is what we are looking for. 300 Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); 301 break; 302 } 303 } 304 305 if (!Restore) { 306 DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n"); 307 return; 308 } 309 310 // Make sure Save and Restore are suitable for shrink-wrapping: 311 // 1. all path from Save needs to lead to Restore before exiting. 312 // 2. all path to Restore needs to go through Save from Entry. 313 // We achieve that by making sure that: 314 // A. Save dominates Restore. 315 // B. Restore post-dominates Save. 316 // C. Save and Restore are in the same loop. 317 bool SaveDominatesRestore = false; 318 bool RestorePostDominatesSave = false; 319 while (Save && Restore && 320 (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || 321 !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || 322 // Post-dominance is not enough in loops to ensure that all uses/defs 323 // are after the prologue and before the epilogue at runtime. 324 // E.g., 325 // while(1) { 326 // Save 327 // Restore 328 // if (...) 329 // break; 330 // use/def CSRs 331 // } 332 // All the uses/defs of CSRs are dominated by Save and post-dominated 333 // by Restore. However, the CSRs uses are still reachable after 334 // Restore and before Save are executed. 335 // 336 // For now, just push the restore/save points outside of loops. 337 // FIXME: Refine the criteria to still find interesting cases 338 // for loops. 339 MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { 340 // Fix (A). 341 if (!SaveDominatesRestore) { 342 Save = MDT->findNearestCommonDominator(Save, Restore); 343 continue; 344 } 345 // Fix (B). 346 if (!RestorePostDominatesSave) 347 Restore = MPDT->findNearestCommonDominator(Restore, Save); 348 349 // Fix (C). 350 if (Save && Restore && 351 (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { 352 if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { 353 // Push Save outside of this loop if immediate dominator is different 354 // from save block. If immediate dominator is not different, bail out. 355 MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT); 356 if (IDom != Save) 357 Save = IDom; 358 else { 359 Save = nullptr; 360 break; 361 } 362 } else { 363 // If the loop does not exit, there is no point in looking 364 // for a post-dominator outside the loop. 365 SmallVector<MachineBasicBlock*, 4> ExitBlocks; 366 MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks); 367 // Push Restore outside of this loop. 368 // Look for the immediate post-dominator of the loop exits. 369 MachineBasicBlock *IPdom = Restore; 370 for (MachineBasicBlock *LoopExitBB: ExitBlocks) { 371 IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT); 372 if (!IPdom) 373 break; 374 } 375 // If the immediate post-dominator is not in a less nested loop, 376 // then we are stuck in a program with an infinite loop. 377 // In that case, we will not find a safe point, hence, bail out. 378 if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore)) 379 Restore = IPdom; 380 else { 381 Restore = nullptr; 382 break; 383 } 384 } 385 } 386 } 387} 388 389bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { 390 if (MF.empty() || !isShrinkWrapEnabled(MF)) 391 return false; 392 393 DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); 394 395 init(MF); 396 397 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 398 std::unique_ptr<RegScavenger> RS( 399 TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); 400 401 for (MachineBasicBlock &MBB : MF) { 402 DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() 403 << '\n'); 404 405 if (MBB.isEHFuncletEntry()) { 406 DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); 407 return false; 408 } 409 410 for (const MachineInstr &MI : MBB) { 411 if (!useOrDefCSROrFI(MI, RS.get())) 412 continue; 413 // Save (resp. restore) point must dominate (resp. post dominate) 414 // MI. Look for the proper basic block for those. 415 updateSaveRestorePoints(MBB, RS.get()); 416 // If we are at a point where we cannot improve the placement of 417 // save/restore instructions, just give up. 418 if (!ArePointsInteresting()) { 419 DEBUG(dbgs() << "No Shrink wrap candidate found\n"); 420 return false; 421 } 422 // No need to look for other instructions, this basic block 423 // will already be part of the handled region. 424 break; 425 } 426 } 427 if (!ArePointsInteresting()) { 428 // If the points are not interesting at this point, then they must be null 429 // because it means we did not encounter any frame/CSR related code. 430 // Otherwise, we would have returned from the previous loop. 431 assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); 432 DEBUG(dbgs() << "Nothing to shrink-wrap\n"); 433 return false; 434 } 435 436 DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq 437 << '\n'); 438 439 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 440 do { 441 DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " 442 << Save->getNumber() << ' ' << Save->getName() << ' ' 443 << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " 444 << Restore->getNumber() << ' ' << Restore->getName() << ' ' 445 << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); 446 447 bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; 448 if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && 449 EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && 450 ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && 451 TFI->canUseAsEpilogue(*Restore))) 452 break; 453 DEBUG(dbgs() << "New points are too expensive or invalid for the target\n"); 454 MachineBasicBlock *NewBB; 455 if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { 456 Save = FindIDom<>(*Save, Save->predecessors(), *MDT); 457 if (!Save) 458 break; 459 NewBB = Save; 460 } else { 461 // Restore is expensive. 462 Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); 463 if (!Restore) 464 break; 465 NewBB = Restore; 466 } 467 updateSaveRestorePoints(*NewBB, RS.get()); 468 } while (Save && Restore); 469 470 if (!ArePointsInteresting()) { 471 ++NumCandidatesDropped; 472 return false; 473 } 474 475 DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() 476 << ' ' << Save->getName() << "\nRestore: " 477 << Restore->getNumber() << ' ' << Restore->getName() << '\n'); 478 479 MachineFrameInfo *MFI = MF.getFrameInfo(); 480 MFI->setSavePoint(Save); 481 MFI->setRestorePoint(Restore); 482 ++NumCandidates; 483 return false; 484} 485 486bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { 487 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 488 489 switch (EnableShrinkWrapOpt) { 490 case cl::BOU_UNSET: 491 return TFI->enableShrinkWrapping(MF) && 492 // Windows with CFI has some limitations that make it impossible 493 // to use shrink-wrapping. 494 !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 495 // Sanitizers look at the value of the stack at the location 496 // of the crash. Since a crash can happen anywhere, the 497 // frame must be lowered before anything else happen for the 498 // sanitizers to be able to get a correct stack frame. 499 !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || 500 MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) || 501 MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory)); 502 // If EnableShrinkWrap is set, it takes precedence on whatever the 503 // target sets. The rational is that we assume we want to test 504 // something related to shrink-wrapping. 505 case cl::BOU_TRUE: 506 return true; 507 case cl::BOU_FALSE: 508 return false; 509 } 510 llvm_unreachable("Invalid shrink-wrapping state"); 511} 512