1//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the PPC implementation of TargetFrameLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCFrameLowering.h" 15#include "PPCInstrBuilder.h" 16#include "PPCInstrInfo.h" 17#include "PPCMachineFunctionInfo.h" 18#include "PPCSubtarget.h" 19#include "PPCTargetMachine.h" 20#include "llvm/CodeGen/MachineFrameInfo.h" 21#include "llvm/CodeGen/MachineFunction.h" 22#include "llvm/CodeGen/MachineInstrBuilder.h" 23#include "llvm/CodeGen/MachineModuleInfo.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/CodeGen/RegisterScavenging.h" 26#include "llvm/IR/Function.h" 27#include "llvm/Target/TargetOptions.h" 28 29using namespace llvm; 30 31/// VRRegNo - Map from a numbered VR register to its enum value. 32/// 33static const MCPhysReg VRRegNo[] = { 34 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 35 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 36 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 37 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 38}; 39 40static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 41 if (STI.isDarwinABI()) 42 return STI.isPPC64() ? 16 : 8; 43 // SVR4 ABI: 44 return STI.isPPC64() ? 16 : 4; 45} 46 47static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 48 return STI.isELFv2ABI() ? 24 : 40; 49} 50 51static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 52 // For the Darwin ABI: 53 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area 54 // for saving the frame pointer (if needed.) While the published ABI has 55 // not used this slot since at least MacOSX 10.2, there is older code 56 // around that does use it, and that needs to continue to work. 57 if (STI.isDarwinABI()) 58 return STI.isPPC64() ? -8U : -4U; 59 60 // SVR4 ABI: First slot in the general register save area. 61 return STI.isPPC64() ? -8U : -4U; 62} 63 64static unsigned computeLinkageSize(const PPCSubtarget &STI) { 65 if (STI.isDarwinABI() || STI.isPPC64()) 66 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 67 68 // SVR4 ABI: 69 return 8; 70} 71 72static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 73 if (STI.isDarwinABI()) 74 return STI.isPPC64() ? -16U : -8U; 75 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80} 81 82PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} 90 91// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 if (Subtarget.isDarwinABI()) { 95 NumEntries = 1; 96 if (Subtarget.isPPC64()) { 97 static const SpillSlot darwin64Offsets = {PPC::X31, -8}; 98 return &darwin64Offsets; 99 } else { 100 static const SpillSlot darwinOffsets = {PPC::R31, -4}; 101 return &darwinOffsets; 102 } 103 } 104 105 // Early exit if not using the SVR4 ABI. 106 if (!Subtarget.isSVR4ABI()) { 107 NumEntries = 0; 108 return nullptr; 109 } 110 111 // Note that the offsets here overlap, but this is fixed up in 112 // processFunctionBeforeFrameFinalized. 113 114 static const SpillSlot Offsets[] = { 115 // Floating-point register save area offsets. 116 {PPC::F31, -8}, 117 {PPC::F30, -16}, 118 {PPC::F29, -24}, 119 {PPC::F28, -32}, 120 {PPC::F27, -40}, 121 {PPC::F26, -48}, 122 {PPC::F25, -56}, 123 {PPC::F24, -64}, 124 {PPC::F23, -72}, 125 {PPC::F22, -80}, 126 {PPC::F21, -88}, 127 {PPC::F20, -96}, 128 {PPC::F19, -104}, 129 {PPC::F18, -112}, 130 {PPC::F17, -120}, 131 {PPC::F16, -128}, 132 {PPC::F15, -136}, 133 {PPC::F14, -144}, 134 135 // General register save area offsets. 136 {PPC::R31, -4}, 137 {PPC::R30, -8}, 138 {PPC::R29, -12}, 139 {PPC::R28, -16}, 140 {PPC::R27, -20}, 141 {PPC::R26, -24}, 142 {PPC::R25, -28}, 143 {PPC::R24, -32}, 144 {PPC::R23, -36}, 145 {PPC::R22, -40}, 146 {PPC::R21, -44}, 147 {PPC::R20, -48}, 148 {PPC::R19, -52}, 149 {PPC::R18, -56}, 150 {PPC::R17, -60}, 151 {PPC::R16, -64}, 152 {PPC::R15, -68}, 153 {PPC::R14, -72}, 154 155 // CR save area offset. We map each of the nonvolatile CR fields 156 // to the slot for CR2, which is the first of the nonvolatile CR 157 // fields to be assigned, so that we only allocate one save slot. 158 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 159 {PPC::CR2, -4}, 160 161 // VRSAVE save area offset. 162 {PPC::VRSAVE, -4}, 163 164 // Vector register save area 165 {PPC::V31, -16}, 166 {PPC::V30, -32}, 167 {PPC::V29, -48}, 168 {PPC::V28, -64}, 169 {PPC::V27, -80}, 170 {PPC::V26, -96}, 171 {PPC::V25, -112}, 172 {PPC::V24, -128}, 173 {PPC::V23, -144}, 174 {PPC::V22, -160}, 175 {PPC::V21, -176}, 176 {PPC::V20, -192}}; 177 178 static const SpillSlot Offsets64[] = { 179 // Floating-point register save area offsets. 180 {PPC::F31, -8}, 181 {PPC::F30, -16}, 182 {PPC::F29, -24}, 183 {PPC::F28, -32}, 184 {PPC::F27, -40}, 185 {PPC::F26, -48}, 186 {PPC::F25, -56}, 187 {PPC::F24, -64}, 188 {PPC::F23, -72}, 189 {PPC::F22, -80}, 190 {PPC::F21, -88}, 191 {PPC::F20, -96}, 192 {PPC::F19, -104}, 193 {PPC::F18, -112}, 194 {PPC::F17, -120}, 195 {PPC::F16, -128}, 196 {PPC::F15, -136}, 197 {PPC::F14, -144}, 198 199 // General register save area offsets. 200 {PPC::X31, -8}, 201 {PPC::X30, -16}, 202 {PPC::X29, -24}, 203 {PPC::X28, -32}, 204 {PPC::X27, -40}, 205 {PPC::X26, -48}, 206 {PPC::X25, -56}, 207 {PPC::X24, -64}, 208 {PPC::X23, -72}, 209 {PPC::X22, -80}, 210 {PPC::X21, -88}, 211 {PPC::X20, -96}, 212 {PPC::X19, -104}, 213 {PPC::X18, -112}, 214 {PPC::X17, -120}, 215 {PPC::X16, -128}, 216 {PPC::X15, -136}, 217 {PPC::X14, -144}, 218 219 // VRSAVE save area offset. 220 {PPC::VRSAVE, -4}, 221 222 // Vector register save area 223 {PPC::V31, -16}, 224 {PPC::V30, -32}, 225 {PPC::V29, -48}, 226 {PPC::V28, -64}, 227 {PPC::V27, -80}, 228 {PPC::V26, -96}, 229 {PPC::V25, -112}, 230 {PPC::V24, -128}, 231 {PPC::V23, -144}, 232 {PPC::V22, -160}, 233 {PPC::V21, -176}, 234 {PPC::V20, -192}}; 235 236 if (Subtarget.isPPC64()) { 237 NumEntries = array_lengthof(Offsets64); 238 239 return Offsets64; 240 } else { 241 NumEntries = array_lengthof(Offsets); 242 243 return Offsets; 244 } 245} 246 247/// RemoveVRSaveCode - We have found that this function does not need any code 248/// to manipulate the VRSAVE register, even though it uses vector registers. 249/// This can happen when the only registers used are known to be live in or out 250/// of the function. Remove all of the VRSAVE related code from the function. 251/// FIXME: The removal of the code results in a compile failure at -O0 when the 252/// function contains a function call, as the GPR containing original VRSAVE 253/// contents is spilled and reloaded around the call. Without the prolog code, 254/// the spill instruction refers to an undefined register. This code needs 255/// to account for all uses of that GPR. 256static void RemoveVRSaveCode(MachineInstr *MI) { 257 MachineBasicBlock *Entry = MI->getParent(); 258 MachineFunction *MF = Entry->getParent(); 259 260 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 261 MachineBasicBlock::iterator MBBI = MI; 262 ++MBBI; 263 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 264 MBBI->eraseFromParent(); 265 266 bool RemovedAllMTVRSAVEs = true; 267 // See if we can find and remove the MTVRSAVE instruction from all of the 268 // epilog blocks. 269 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 270 // If last instruction is a return instruction, add an epilogue 271 if (I->isReturnBlock()) { 272 bool FoundIt = false; 273 for (MBBI = I->end(); MBBI != I->begin(); ) { 274 --MBBI; 275 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 276 MBBI->eraseFromParent(); // remove it. 277 FoundIt = true; 278 break; 279 } 280 } 281 RemovedAllMTVRSAVEs &= FoundIt; 282 } 283 } 284 285 // If we found and removed all MTVRSAVE instructions, remove the read of 286 // VRSAVE as well. 287 if (RemovedAllMTVRSAVEs) { 288 MBBI = MI; 289 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 290 --MBBI; 291 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 292 MBBI->eraseFromParent(); 293 } 294 295 // Finally, nuke the UPDATE_VRSAVE. 296 MI->eraseFromParent(); 297} 298 299// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 300// instruction selector. Based on the vector registers that have been used, 301// transform this into the appropriate ORI instruction. 302static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { 303 MachineFunction *MF = MI->getParent()->getParent(); 304 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 305 DebugLoc dl = MI->getDebugLoc(); 306 307 const MachineRegisterInfo &MRI = MF->getRegInfo(); 308 unsigned UsedRegMask = 0; 309 for (unsigned i = 0; i != 32; ++i) 310 if (MRI.isPhysRegModified(VRRegNo[i])) 311 UsedRegMask |= 1 << (31-i); 312 313 // Live in and live out values already must be in the mask, so don't bother 314 // marking them. 315 for (MachineRegisterInfo::livein_iterator 316 I = MF->getRegInfo().livein_begin(), 317 E = MF->getRegInfo().livein_end(); I != E; ++I) { 318 unsigned RegNo = TRI->getEncodingValue(I->first); 319 if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. 320 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 321 } 322 323 // Live out registers appear as use operands on return instructions. 324 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 325 UsedRegMask != 0 && BI != BE; ++BI) { 326 const MachineBasicBlock &MBB = *BI; 327 if (!MBB.isReturnBlock()) 328 continue; 329 const MachineInstr &Ret = MBB.back(); 330 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 331 const MachineOperand &MO = Ret.getOperand(I); 332 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 333 continue; 334 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 335 UsedRegMask &= ~(1 << (31-RegNo)); 336 } 337 } 338 339 // If no registers are used, turn this into a copy. 340 if (UsedRegMask == 0) { 341 // Remove all VRSAVE code. 342 RemoveVRSaveCode(MI); 343 return; 344 } 345 346 unsigned SrcReg = MI->getOperand(1).getReg(); 347 unsigned DstReg = MI->getOperand(0).getReg(); 348 349 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 350 if (DstReg != SrcReg) 351 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 352 .addReg(SrcReg) 353 .addImm(UsedRegMask); 354 else 355 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 356 .addReg(SrcReg, RegState::Kill) 357 .addImm(UsedRegMask); 358 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 359 if (DstReg != SrcReg) 360 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 361 .addReg(SrcReg) 362 .addImm(UsedRegMask >> 16); 363 else 364 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 365 .addReg(SrcReg, RegState::Kill) 366 .addImm(UsedRegMask >> 16); 367 } else { 368 if (DstReg != SrcReg) 369 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 370 .addReg(SrcReg) 371 .addImm(UsedRegMask >> 16); 372 else 373 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 374 .addReg(SrcReg, RegState::Kill) 375 .addImm(UsedRegMask >> 16); 376 377 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 378 .addReg(DstReg, RegState::Kill) 379 .addImm(UsedRegMask & 0xFFFF); 380 } 381 382 // Remove the old UPDATE_VRSAVE instruction. 383 MI->eraseFromParent(); 384} 385 386static bool spillsCR(const MachineFunction &MF) { 387 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 388 return FuncInfo->isCRSpilled(); 389} 390 391static bool spillsVRSAVE(const MachineFunction &MF) { 392 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 393 return FuncInfo->isVRSAVESpilled(); 394} 395 396static bool hasSpills(const MachineFunction &MF) { 397 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 398 return FuncInfo->hasSpills(); 399} 400 401static bool hasNonRISpills(const MachineFunction &MF) { 402 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 403 return FuncInfo->hasNonRISpills(); 404} 405 406/// MustSaveLR - Return true if this function requires that we save the LR 407/// register onto the stack in the prolog and restore it in the epilog of the 408/// function. 409static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 410 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 411 412 // We need a save/restore of LR if there is any def of LR (which is 413 // defined by calls, including the PIC setup sequence), or if there is 414 // some use of the LR stack slot (e.g. for builtin_return_address). 415 // (LR comes in 32 and 64 bit versions.) 416 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 417 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 418} 419 420/// determineFrameLayout - Determine the size of the frame and maximum call 421/// frame size. 422unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, 423 bool UpdateMF, 424 bool UseEstimate) const { 425 MachineFrameInfo *MFI = MF.getFrameInfo(); 426 427 // Get the number of bytes to allocate from the FrameInfo 428 unsigned FrameSize = 429 UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); 430 431 // Get stack alignments. The frame must be aligned to the greatest of these: 432 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 433 unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame 434 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 435 436 const PPCRegisterInfo *RegInfo = 437 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 438 439 // If we are a leaf function, and use up to 224 bytes of stack space, 440 // don't have a frame pointer, calls, or dynamic alloca then we do not need 441 // to adjust the stack pointer (we fit in the Red Zone). 442 // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate 443 // stackless code if all local vars are reg-allocated. 444 bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); 445 unsigned LR = RegInfo->getRARegister(); 446 if (!DisableRedZone && 447 (Subtarget.isPPC64() || // 32-bit SVR4, no stack- 448 !Subtarget.isSVR4ABI() || // allocated locals. 449 FrameSize == 0) && 450 FrameSize <= 224 && // Fits in red zone. 451 !MFI->hasVarSizedObjects() && // No dynamic alloca. 452 !MFI->adjustsStack() && // No calls. 453 !MustSaveLR(MF, LR) && 454 !RegInfo->hasBasePointer(MF)) { // No special alignment. 455 // No need for frame 456 if (UpdateMF) 457 MFI->setStackSize(0); 458 return 0; 459 } 460 461 // Get the maximum call frame size of all the calls. 462 unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); 463 464 // Maximum call frame needs to be at least big enough for linkage area. 465 unsigned minCallFrameSize = getLinkageSize(); 466 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 467 468 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 469 // that allocations will be aligned. 470 if (MFI->hasVarSizedObjects()) 471 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 472 473 // Update maximum call frame size. 474 if (UpdateMF) 475 MFI->setMaxCallFrameSize(maxCallFrameSize); 476 477 // Include call frame size in total. 478 FrameSize += maxCallFrameSize; 479 480 // Make sure the frame is aligned. 481 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 482 483 // Update frame info. 484 if (UpdateMF) 485 MFI->setStackSize(FrameSize); 486 487 return FrameSize; 488} 489 490// hasFP - Return true if the specified function actually has a dedicated frame 491// pointer register. 492bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 493 const MachineFrameInfo *MFI = MF.getFrameInfo(); 494 // FIXME: This is pretty much broken by design: hasFP() might be called really 495 // early, before the stack layout was calculated and thus hasFP() might return 496 // true or false here depending on the time of call. 497 return (MFI->getStackSize()) && needsFP(MF); 498} 499 500// needsFP - Return true if the specified function should have a dedicated frame 501// pointer register. This is true if the function has variable sized allocas or 502// if frame pointer elimination is disabled. 503bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 504 const MachineFrameInfo *MFI = MF.getFrameInfo(); 505 506 // Naked functions have no stack frame pushed, so we don't have a frame 507 // pointer. 508 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 509 return false; 510 511 return MF.getTarget().Options.DisableFramePointerElim(MF) || 512 MFI->hasVarSizedObjects() || 513 MFI->hasStackMap() || MFI->hasPatchPoint() || 514 (MF.getTarget().Options.GuaranteedTailCallOpt && 515 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 516} 517 518void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 519 bool is31 = needsFP(MF); 520 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 521 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 522 523 const PPCRegisterInfo *RegInfo = 524 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 525 bool HasBP = RegInfo->hasBasePointer(MF); 526 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 527 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; 528 529 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 530 BI != BE; ++BI) 531 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 532 --MBBI; 533 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 534 MachineOperand &MO = MBBI->getOperand(I); 535 if (!MO.isReg()) 536 continue; 537 538 switch (MO.getReg()) { 539 case PPC::FP: 540 MO.setReg(FPReg); 541 break; 542 case PPC::FP8: 543 MO.setReg(FP8Reg); 544 break; 545 case PPC::BP: 546 MO.setReg(BPReg); 547 break; 548 case PPC::BP8: 549 MO.setReg(BP8Reg); 550 break; 551 552 } 553 } 554 } 555} 556 557/* This function will do the following: 558 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 559 respectively (defaults recommended by the ABI) and return true 560 - If MBB is not an entry block, initialize the register scavenger and look 561 for available registers. 562 - If the defaults (R0/R12) are available, return true 563 - If TwoUniqueRegsRequired is set to true, it looks for two unique 564 registers. Otherwise, look for a single available register. 565 - If the required registers are found, set SR1 and SR2 and return true. 566 - If the required registers are not found, set SR2 or both SR1 and SR2 to 567 PPC::NoRegister and return false. 568 569 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 570 is not set, this function will attempt to find two different registers, but 571 still return true if only one register is available (and set SR1 == SR2). 572*/ 573bool 574PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 575 bool UseAtEnd, 576 bool TwoUniqueRegsRequired, 577 unsigned *SR1, 578 unsigned *SR2) const { 579 RegScavenger RS; 580 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 581 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 582 583 // Set the defaults for the two scratch registers. 584 if (SR1) 585 *SR1 = R0; 586 587 if (SR2) { 588 assert (SR1 && "Asking for the second scratch register but not the first?"); 589 *SR2 = R12; 590 } 591 592 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 593 if ((UseAtEnd && MBB->isReturnBlock()) || 594 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 595 return true; 596 597 RS.enterBasicBlock(*MBB); 598 599 if (UseAtEnd && !MBB->empty()) { 600 // The scratch register will be used at the end of the block, so must 601 // consider all registers used within the block 602 603 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 604 // If no terminator, back iterator up to previous instruction. 605 if (MBBI == MBB->end()) 606 MBBI = std::prev(MBBI); 607 608 if (MBBI != MBB->begin()) 609 RS.forward(MBBI); 610 } 611 612 // If the two registers are available, we're all good. 613 // Note that we only return here if both R0 and R12 are available because 614 // although the function may not require two unique registers, it may benefit 615 // from having two so we should try to provide them. 616 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 617 return true; 618 619 // Get the list of callee-saved registers for the target. 620 const PPCRegisterInfo *RegInfo = 621 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 622 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 623 624 // Get all the available registers in the block. 625 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 626 &PPC::GPRCRegClass); 627 628 // We shouldn't use callee-saved registers as scratch registers as they may be 629 // available when looking for a candidate block for shrink wrapping but not 630 // available when the actual prologue/epilogue is being emitted because they 631 // were added as live-in to the prologue block by PrologueEpilogueInserter. 632 for (int i = 0; CSRegs[i]; ++i) 633 BV.reset(CSRegs[i]); 634 635 // Set the first scratch register to the first available one. 636 if (SR1) { 637 int FirstScratchReg = BV.find_first(); 638 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 639 } 640 641 // If there is another one available, set the second scratch register to that. 642 // Otherwise, set it to either PPC::NoRegister if this function requires two 643 // or to whatever SR1 is set to if this function doesn't require two. 644 if (SR2) { 645 int SecondScratchReg = BV.find_next(*SR1); 646 if (SecondScratchReg != -1) 647 *SR2 = SecondScratchReg; 648 else 649 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 650 } 651 652 // Now that we've done our best to provide both registers, double check 653 // whether we were unable to provide enough. 654 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 655 return false; 656 657 return true; 658} 659 660// We need a scratch register for spilling LR and for spilling CR. By default, 661// we use two scratch registers to hide latency. However, if only one scratch 662// register is available, we can adjust for that by not overlapping the spill 663// code. However, if we need to realign the stack (i.e. have a base pointer) 664// and the stack frame is large, we need two scratch registers. 665bool 666PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 667 const PPCRegisterInfo *RegInfo = 668 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 669 MachineFunction &MF = *(MBB->getParent()); 670 bool HasBP = RegInfo->hasBasePointer(MF); 671 unsigned FrameSize = determineFrameLayout(MF, false); 672 int NegFrameSize = -FrameSize; 673 bool IsLargeFrame = !isInt<16>(NegFrameSize); 674 MachineFrameInfo *MFI = MF.getFrameInfo(); 675 unsigned MaxAlign = MFI->getMaxAlignment(); 676 677 return IsLargeFrame && HasBP && MaxAlign > 1; 678} 679 680bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 681 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 682 683 return findScratchRegister(TmpMBB, false, 684 twoUniqueScratchRegsRequired(TmpMBB)); 685} 686 687bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 688 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 689 690 return findScratchRegister(TmpMBB, true); 691} 692 693void PPCFrameLowering::emitPrologue(MachineFunction &MF, 694 MachineBasicBlock &MBB) const { 695 MachineBasicBlock::iterator MBBI = MBB.begin(); 696 MachineFrameInfo *MFI = MF.getFrameInfo(); 697 const PPCInstrInfo &TII = 698 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 699 const PPCRegisterInfo *RegInfo = 700 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 701 702 MachineModuleInfo &MMI = MF.getMMI(); 703 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 704 DebugLoc dl; 705 bool needsCFI = MMI.hasDebugInfo() || 706 MF.getFunction()->needsUnwindTableEntry(); 707 708 // Get processor type. 709 bool isPPC64 = Subtarget.isPPC64(); 710 // Get the ABI. 711 bool isSVR4ABI = Subtarget.isSVR4ABI(); 712 bool isELFv2ABI = Subtarget.isELFv2ABI(); 713 assert((Subtarget.isDarwinABI() || isSVR4ABI) && 714 "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); 715 716 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 717 // process it. 718 if (!isSVR4ABI) 719 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 720 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 721 HandleVRSaveUpdate(MBBI, TII); 722 break; 723 } 724 } 725 726 // Move MBBI back to the beginning of the prologue block. 727 MBBI = MBB.begin(); 728 729 // Work out frame sizes. 730 unsigned FrameSize = determineFrameLayout(MF); 731 int NegFrameSize = -FrameSize; 732 if (!isInt<32>(NegFrameSize)) 733 llvm_unreachable("Unhandled stack size!"); 734 735 if (MFI->isFrameAddressTaken()) 736 replaceFPWithRealFP(MF); 737 738 // Check if the link register (LR) must be saved. 739 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 740 bool MustSaveLR = FI->mustSaveLR(); 741 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 742 bool MustSaveCR = !MustSaveCRs.empty(); 743 // Do we have a frame pointer and/or base pointer for this function? 744 bool HasFP = hasFP(MF); 745 bool HasBP = RegInfo->hasBasePointer(MF); 746 747 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 748 unsigned BPReg = RegInfo->getBaseRegister(MF); 749 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 750 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 751 unsigned ScratchReg = 0; 752 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 753 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 754 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 755 : PPC::MFLR ); 756 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 757 : PPC::STW ); 758 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 759 : PPC::STWU ); 760 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 761 : PPC::STWUX); 762 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 763 : PPC::LIS ); 764 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 765 : PPC::ORI ); 766 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 767 : PPC::OR ); 768 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 769 : PPC::SUBFC); 770 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 771 : PPC::SUBFIC); 772 773 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 774 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 775 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 776 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 777 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 778 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 779 780 // Using the same bool variable as below to supress compiler warnings. 781 bool SingleScratchReg = 782 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 783 &ScratchReg, &TempReg); 784 assert(SingleScratchReg && 785 "Required number of registers not available in this block"); 786 787 SingleScratchReg = ScratchReg == TempReg; 788 789 int LROffset = getReturnSaveOffset(); 790 791 int FPOffset = 0; 792 if (HasFP) { 793 if (isSVR4ABI) { 794 MachineFrameInfo *FFI = MF.getFrameInfo(); 795 int FPIndex = FI->getFramePointerSaveIndex(); 796 assert(FPIndex && "No Frame Pointer Save Slot!"); 797 FPOffset = FFI->getObjectOffset(FPIndex); 798 } else { 799 FPOffset = getFramePointerSaveOffset(); 800 } 801 } 802 803 int BPOffset = 0; 804 if (HasBP) { 805 if (isSVR4ABI) { 806 MachineFrameInfo *FFI = MF.getFrameInfo(); 807 int BPIndex = FI->getBasePointerSaveIndex(); 808 assert(BPIndex && "No Base Pointer Save Slot!"); 809 BPOffset = FFI->getObjectOffset(BPIndex); 810 } else { 811 BPOffset = getBasePointerSaveOffset(); 812 } 813 } 814 815 int PBPOffset = 0; 816 if (FI->usesPICBase()) { 817 MachineFrameInfo *FFI = MF.getFrameInfo(); 818 int PBPIndex = FI->getPICBasePointerSaveIndex(); 819 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 820 PBPOffset = FFI->getObjectOffset(PBPIndex); 821 } 822 823 // Get stack alignments. 824 unsigned MaxAlign = MFI->getMaxAlignment(); 825 if (HasBP && MaxAlign > 1) 826 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 827 "Invalid alignment!"); 828 829 // Frames of 32KB & larger require special handling because they cannot be 830 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 831 bool isLargeFrame = !isInt<16>(NegFrameSize); 832 833 assert((isPPC64 || !MustSaveCR) && 834 "Prologue CR saving supported only in 64-bit mode"); 835 836 // If we need to spill the CR and the LR but we don't have two separate 837 // registers available, we must spill them one at a time 838 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 839 // In the ELFv2 ABI, we are not required to save all CR fields. 840 // If only one or two CR fields are clobbered, it is more efficient to use 841 // mfocrf to selectively save just those fields, because mfocrf has short 842 // latency compares to mfcr. 843 unsigned MfcrOpcode = PPC::MFCR8; 844 unsigned CrState = RegState::ImplicitKill; 845 if (isELFv2ABI && MustSaveCRs.size() == 1) { 846 MfcrOpcode = PPC::MFOCRF8; 847 CrState = RegState::Kill; 848 } 849 MachineInstrBuilder MIB = 850 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 851 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 852 MIB.addReg(MustSaveCRs[i], CrState); 853 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 854 .addReg(TempReg, getKillRegState(true)) 855 .addImm(8) 856 .addReg(SPReg); 857 } 858 859 if (MustSaveLR) 860 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 861 862 if (MustSaveCR && 863 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 864 // In the ELFv2 ABI, we are not required to save all CR fields. 865 // If only one or two CR fields are clobbered, it is more efficient to use 866 // mfocrf to selectively save just those fields, because mfocrf has short 867 // latency compares to mfcr. 868 unsigned MfcrOpcode = PPC::MFCR8; 869 unsigned CrState = RegState::ImplicitKill; 870 if (isELFv2ABI && MustSaveCRs.size() == 1) { 871 MfcrOpcode = PPC::MFOCRF8; 872 CrState = RegState::Kill; 873 } 874 MachineInstrBuilder MIB = 875 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 876 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 877 MIB.addReg(MustSaveCRs[i], CrState); 878 } 879 880 if (HasFP) 881 // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. 882 BuildMI(MBB, MBBI, dl, StoreInst) 883 .addReg(FPReg) 884 .addImm(FPOffset) 885 .addReg(SPReg); 886 887 if (FI->usesPICBase()) 888 // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. 889 BuildMI(MBB, MBBI, dl, StoreInst) 890 .addReg(PPC::R30) 891 .addImm(PBPOffset) 892 .addReg(SPReg); 893 894 if (HasBP) 895 // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. 896 BuildMI(MBB, MBBI, dl, StoreInst) 897 .addReg(BPReg) 898 .addImm(BPOffset) 899 .addReg(SPReg); 900 901 if (MustSaveLR) 902 // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. 903 BuildMI(MBB, MBBI, dl, StoreInst) 904 .addReg(ScratchReg) 905 .addImm(LROffset) 906 .addReg(SPReg); 907 908 if (MustSaveCR && 909 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 910 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 911 .addReg(TempReg, getKillRegState(true)) 912 .addImm(8) 913 .addReg(SPReg); 914 915 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 916 if (!FrameSize) return; 917 918 // Adjust stack pointer: r1 += NegFrameSize. 919 // If there is a preferred stack alignment, align R1 now 920 921 if (HasBP) { 922 // Save a copy of r1 as the base pointer. 923 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 924 .addReg(SPReg) 925 .addReg(SPReg); 926 } 927 928 // This condition must be kept in sync with canUseAsPrologue. 929 if (HasBP && MaxAlign > 1) { 930 if (isPPC64) 931 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 932 .addReg(SPReg) 933 .addImm(0) 934 .addImm(64 - Log2_32(MaxAlign)); 935 else // PPC32... 936 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 937 .addReg(SPReg) 938 .addImm(0) 939 .addImm(32 - Log2_32(MaxAlign)) 940 .addImm(31); 941 if (!isLargeFrame) { 942 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 943 .addReg(ScratchReg, RegState::Kill) 944 .addImm(NegFrameSize); 945 } else { 946 assert(!SingleScratchReg && "Only a single scratch reg available"); 947 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 948 .addImm(NegFrameSize >> 16); 949 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 950 .addReg(TempReg, RegState::Kill) 951 .addImm(NegFrameSize & 0xFFFF); 952 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 953 .addReg(ScratchReg, RegState::Kill) 954 .addReg(TempReg, RegState::Kill); 955 } 956 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 957 .addReg(SPReg, RegState::Kill) 958 .addReg(SPReg) 959 .addReg(ScratchReg); 960 961 } else if (!isLargeFrame) { 962 BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) 963 .addReg(SPReg) 964 .addImm(NegFrameSize) 965 .addReg(SPReg); 966 967 } else { 968 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 969 .addImm(NegFrameSize >> 16); 970 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 971 .addReg(ScratchReg, RegState::Kill) 972 .addImm(NegFrameSize & 0xFFFF); 973 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 974 .addReg(SPReg, RegState::Kill) 975 .addReg(SPReg) 976 .addReg(ScratchReg); 977 } 978 979 // Add Call Frame Information for the instructions we generated above. 980 if (needsCFI) { 981 unsigned CFIIndex; 982 983 if (HasBP) { 984 // Define CFA in terms of BP. Do this in preference to using FP/SP, 985 // because if the stack needed aligning then CFA won't be at a fixed 986 // offset from FP/SP. 987 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 988 CFIIndex = MMI.addFrameInst( 989 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 990 } else { 991 // Adjust the definition of CFA to account for the change in SP. 992 assert(NegFrameSize); 993 CFIIndex = MMI.addFrameInst( 994 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 995 } 996 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 997 .addCFIIndex(CFIIndex); 998 999 if (HasFP) { 1000 // Describe where FP was saved, at a fixed offset from CFA. 1001 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1002 CFIIndex = MMI.addFrameInst( 1003 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1004 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1005 .addCFIIndex(CFIIndex); 1006 } 1007 1008 if (FI->usesPICBase()) { 1009 // Describe where FP was saved, at a fixed offset from CFA. 1010 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1011 CFIIndex = MMI.addFrameInst( 1012 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1013 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1014 .addCFIIndex(CFIIndex); 1015 } 1016 1017 if (HasBP) { 1018 // Describe where BP was saved, at a fixed offset from CFA. 1019 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1020 CFIIndex = MMI.addFrameInst( 1021 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1022 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1023 .addCFIIndex(CFIIndex); 1024 } 1025 1026 if (MustSaveLR) { 1027 // Describe where LR was saved, at a fixed offset from CFA. 1028 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1029 CFIIndex = MMI.addFrameInst( 1030 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1031 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1032 .addCFIIndex(CFIIndex); 1033 } 1034 } 1035 1036 // If there is a frame pointer, copy R1 into R31 1037 if (HasFP) { 1038 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1039 .addReg(SPReg) 1040 .addReg(SPReg); 1041 1042 if (!HasBP && needsCFI) { 1043 // Change the definition of CFA from SP+offset to FP+offset, because SP 1044 // will change at every alloca. 1045 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1046 unsigned CFIIndex = MMI.addFrameInst( 1047 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1048 1049 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1050 .addCFIIndex(CFIIndex); 1051 } 1052 } 1053 1054 if (needsCFI) { 1055 // Describe where callee saved registers were saved, at fixed offsets from 1056 // CFA. 1057 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 1058 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1059 unsigned Reg = CSI[I].getReg(); 1060 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1061 1062 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1063 // subregisters of CR2. We just need to emit a move of CR2. 1064 if (PPC::CRBITRCRegClass.contains(Reg)) 1065 continue; 1066 1067 // For SVR4, don't emit a move for the CR spill slot if we haven't 1068 // spilled CRs. 1069 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1070 && !MustSaveCR) 1071 continue; 1072 1073 // For 64-bit SVR4 when we have spilled CRs, the spill location 1074 // is SP+8, not a frame-relative slot. 1075 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1076 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1077 // the whole CR word. In the ELFv2 ABI, every CR that was 1078 // actually saved gets its own CFI record. 1079 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1080 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 1081 nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); 1082 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1083 .addCFIIndex(CFIIndex); 1084 continue; 1085 } 1086 1087 int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); 1088 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 1089 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1090 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1091 .addCFIIndex(CFIIndex); 1092 } 1093 } 1094} 1095 1096void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1097 MachineBasicBlock &MBB) const { 1098 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1099 DebugLoc dl; 1100 1101 if (MBBI != MBB.end()) 1102 dl = MBBI->getDebugLoc(); 1103 1104 const PPCInstrInfo &TII = 1105 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1106 const PPCRegisterInfo *RegInfo = 1107 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 1108 1109 // Get alignment info so we know how to restore the SP. 1110 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1111 1112 // Get the number of bytes allocated from the FrameInfo. 1113 int FrameSize = MFI->getStackSize(); 1114 1115 // Get processor type. 1116 bool isPPC64 = Subtarget.isPPC64(); 1117 // Get the ABI. 1118 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1119 1120 // Check if the link register (LR) has been saved. 1121 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1122 bool MustSaveLR = FI->mustSaveLR(); 1123 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1124 bool MustSaveCR = !MustSaveCRs.empty(); 1125 // Do we have a frame pointer and/or base pointer for this function? 1126 bool HasFP = hasFP(MF); 1127 bool HasBP = RegInfo->hasBasePointer(MF); 1128 1129 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1130 unsigned BPReg = RegInfo->getBaseRegister(MF); 1131 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1132 unsigned ScratchReg = 0; 1133 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1134 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1135 : PPC::MTLR ); 1136 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1137 : PPC::LWZ ); 1138 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1139 : PPC::LIS ); 1140 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1141 : PPC::ORI ); 1142 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1143 : PPC::ADDI ); 1144 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1145 : PPC::ADD4 ); 1146 1147 int LROffset = getReturnSaveOffset(); 1148 1149 int FPOffset = 0; 1150 1151 // Using the same bool variable as below to supress compiler warnings. 1152 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1153 &TempReg); 1154 assert(SingleScratchReg && 1155 "Could not find an available scratch register"); 1156 1157 SingleScratchReg = ScratchReg == TempReg; 1158 1159 if (HasFP) { 1160 if (isSVR4ABI) { 1161 MachineFrameInfo *FFI = MF.getFrameInfo(); 1162 int FPIndex = FI->getFramePointerSaveIndex(); 1163 assert(FPIndex && "No Frame Pointer Save Slot!"); 1164 FPOffset = FFI->getObjectOffset(FPIndex); 1165 } else { 1166 FPOffset = getFramePointerSaveOffset(); 1167 } 1168 } 1169 1170 int BPOffset = 0; 1171 if (HasBP) { 1172 if (isSVR4ABI) { 1173 MachineFrameInfo *FFI = MF.getFrameInfo(); 1174 int BPIndex = FI->getBasePointerSaveIndex(); 1175 assert(BPIndex && "No Base Pointer Save Slot!"); 1176 BPOffset = FFI->getObjectOffset(BPIndex); 1177 } else { 1178 BPOffset = getBasePointerSaveOffset(); 1179 } 1180 } 1181 1182 int PBPOffset = 0; 1183 if (FI->usesPICBase()) { 1184 MachineFrameInfo *FFI = MF.getFrameInfo(); 1185 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1186 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1187 PBPOffset = FFI->getObjectOffset(PBPIndex); 1188 } 1189 1190 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1191 1192 if (IsReturnBlock) { 1193 unsigned RetOpcode = MBBI->getOpcode(); 1194 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1195 RetOpcode == PPC::TCRETURNdi || 1196 RetOpcode == PPC::TCRETURNai || 1197 RetOpcode == PPC::TCRETURNri8 || 1198 RetOpcode == PPC::TCRETURNdi8 || 1199 RetOpcode == PPC::TCRETURNai8; 1200 1201 if (UsesTCRet) { 1202 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1203 MachineOperand &StackAdjust = MBBI->getOperand(1); 1204 assert(StackAdjust.isImm() && "Expecting immediate value."); 1205 // Adjust stack pointer. 1206 int StackAdj = StackAdjust.getImm(); 1207 int Delta = StackAdj - MaxTCRetDelta; 1208 assert((Delta >= 0) && "Delta must be positive"); 1209 if (MaxTCRetDelta>0) 1210 FrameSize += (StackAdj +Delta); 1211 else 1212 FrameSize += StackAdj; 1213 } 1214 } 1215 1216 // Frames of 32KB & larger require special handling because they cannot be 1217 // indexed into with a simple LD/LWZ immediate offset operand. 1218 bool isLargeFrame = !isInt<16>(FrameSize); 1219 1220 if (FrameSize) { 1221 // In the prologue, the loaded (or persistent) stack pointer value is offset 1222 // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now. 1223 1224 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1225 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1226 // call which invalidates the stack pointer value in SP(0). So we use the 1227 // value of R31 in this case. 1228 if (FI->hasFastCall()) { 1229 assert(HasFP && "Expecting a valid frame pointer."); 1230 if (!isLargeFrame) { 1231 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1232 .addReg(FPReg).addImm(FrameSize); 1233 } else { 1234 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1235 .addImm(FrameSize >> 16); 1236 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1237 .addReg(ScratchReg, RegState::Kill) 1238 .addImm(FrameSize & 0xFFFF); 1239 BuildMI(MBB, MBBI, dl, AddInst) 1240 .addReg(SPReg) 1241 .addReg(FPReg) 1242 .addReg(ScratchReg); 1243 } 1244 } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { 1245 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1246 .addReg(SPReg) 1247 .addImm(FrameSize); 1248 } else { 1249 BuildMI(MBB, MBBI, dl, LoadInst, SPReg) 1250 .addImm(0) 1251 .addReg(SPReg); 1252 } 1253 } 1254 1255 assert((isPPC64 || !MustSaveCR) && 1256 "Epilogue CR restoring supported only in 64-bit mode"); 1257 1258 // If we need to save both the LR and the CR and we only have one available 1259 // scratch register, we must do them one at a time. 1260 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1261 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1262 .addImm(8) 1263 .addReg(SPReg); 1264 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1265 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1266 .addReg(TempReg, getKillRegState(i == e-1)); 1267 } 1268 1269 if (MustSaveLR) 1270 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1271 .addImm(LROffset) 1272 .addReg(SPReg); 1273 1274 if (MustSaveCR && 1275 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1276 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1277 .addImm(8) 1278 .addReg(SPReg); 1279 1280 if (HasFP) 1281 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1282 .addImm(FPOffset) 1283 .addReg(SPReg); 1284 1285 if (FI->usesPICBase()) 1286 // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. 1287 BuildMI(MBB, MBBI, dl, LoadInst) 1288 .addReg(PPC::R30) 1289 .addImm(PBPOffset) 1290 .addReg(SPReg); 1291 1292 if (HasBP) 1293 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1294 .addImm(BPOffset) 1295 .addReg(SPReg); 1296 1297 if (MustSaveCR && 1298 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1299 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1300 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1301 .addReg(TempReg, getKillRegState(i == e-1)); 1302 1303 if (MustSaveLR) 1304 BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); 1305 1306 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1307 // call optimization 1308 if (IsReturnBlock) { 1309 unsigned RetOpcode = MBBI->getOpcode(); 1310 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1311 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1312 MF.getFunction()->getCallingConv() == CallingConv::Fast) { 1313 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1314 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1315 1316 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1317 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1318 .addReg(SPReg).addImm(CallerAllocatedAmt); 1319 } else { 1320 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1321 .addImm(CallerAllocatedAmt >> 16); 1322 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1323 .addReg(ScratchReg, RegState::Kill) 1324 .addImm(CallerAllocatedAmt & 0xFFFF); 1325 BuildMI(MBB, MBBI, dl, AddInst) 1326 .addReg(SPReg) 1327 .addReg(FPReg) 1328 .addReg(ScratchReg); 1329 } 1330 } else { 1331 createTailCallBranchInstr(MBB); 1332 } 1333 } 1334} 1335 1336void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1337 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1338 DebugLoc dl; 1339 1340 if (MBBI != MBB.end()) 1341 dl = MBBI->getDebugLoc(); 1342 1343 const PPCInstrInfo &TII = 1344 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1345 1346 // Create branch instruction for pseudo tail call return instruction 1347 unsigned RetOpcode = MBBI->getOpcode(); 1348 if (RetOpcode == PPC::TCRETURNdi) { 1349 MBBI = MBB.getLastNonDebugInstr(); 1350 MachineOperand &JumpTarget = MBBI->getOperand(0); 1351 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1352 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1353 } else if (RetOpcode == PPC::TCRETURNri) { 1354 MBBI = MBB.getLastNonDebugInstr(); 1355 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1356 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1357 } else if (RetOpcode == PPC::TCRETURNai) { 1358 MBBI = MBB.getLastNonDebugInstr(); 1359 MachineOperand &JumpTarget = MBBI->getOperand(0); 1360 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1361 } else if (RetOpcode == PPC::TCRETURNdi8) { 1362 MBBI = MBB.getLastNonDebugInstr(); 1363 MachineOperand &JumpTarget = MBBI->getOperand(0); 1364 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1365 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1366 } else if (RetOpcode == PPC::TCRETURNri8) { 1367 MBBI = MBB.getLastNonDebugInstr(); 1368 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1369 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1370 } else if (RetOpcode == PPC::TCRETURNai8) { 1371 MBBI = MBB.getLastNonDebugInstr(); 1372 MachineOperand &JumpTarget = MBBI->getOperand(0); 1373 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1374 } 1375} 1376 1377void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1378 BitVector &SavedRegs, 1379 RegScavenger *RS) const { 1380 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1381 1382 const PPCRegisterInfo *RegInfo = 1383 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 1384 1385 // Save and clear the LR state. 1386 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1387 unsigned LR = RegInfo->getRARegister(); 1388 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1389 SavedRegs.reset(LR); 1390 1391 // Save R31 if necessary 1392 int FPSI = FI->getFramePointerSaveIndex(); 1393 bool isPPC64 = Subtarget.isPPC64(); 1394 bool isDarwinABI = Subtarget.isDarwinABI(); 1395 MachineFrameInfo *MFI = MF.getFrameInfo(); 1396 1397 // If the frame pointer save index hasn't been defined yet. 1398 if (!FPSI && needsFP(MF)) { 1399 // Find out what the fix offset of the frame pointer save area. 1400 int FPOffset = getFramePointerSaveOffset(); 1401 // Allocate the frame index for frame pointer save area. 1402 FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1403 // Save the result. 1404 FI->setFramePointerSaveIndex(FPSI); 1405 } 1406 1407 int BPSI = FI->getBasePointerSaveIndex(); 1408 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1409 int BPOffset = getBasePointerSaveOffset(); 1410 // Allocate the frame index for the base pointer save area. 1411 BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1412 // Save the result. 1413 FI->setBasePointerSaveIndex(BPSI); 1414 } 1415 1416 // Reserve stack space for the PIC Base register (R30). 1417 // Only used in SVR4 32-bit. 1418 if (FI->usesPICBase()) { 1419 int PBPSI = MFI->CreateFixedObject(4, -8, true); 1420 FI->setPICBasePointerSaveIndex(PBPSI); 1421 } 1422 1423 // Reserve stack space to move the linkage area to in case of a tail call. 1424 int TCSPDelta = 0; 1425 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1426 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1427 MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1428 } 1429 1430 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1431 // function uses CR 2, 3, or 4. 1432 if (!isPPC64 && !isDarwinABI && 1433 (SavedRegs.test(PPC::CR2) || 1434 SavedRegs.test(PPC::CR3) || 1435 SavedRegs.test(PPC::CR4))) { 1436 int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); 1437 FI->setCRSpillFrameIndex(FrameIdx); 1438 } 1439} 1440 1441void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1442 RegScavenger *RS) const { 1443 // Early exit if not using the SVR4 ABI. 1444 if (!Subtarget.isSVR4ABI()) { 1445 addScavengingSpillSlot(MF, RS); 1446 return; 1447 } 1448 1449 // Get callee saved register information. 1450 MachineFrameInfo *FFI = MF.getFrameInfo(); 1451 const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); 1452 1453 // If the function is shrink-wrapped, and if the function has a tail call, the 1454 // tail call might not be in the new RestoreBlock, so real branch instruction 1455 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1456 // RestoreBlock. So we handle this case here. 1457 if (FFI->getSavePoint() && FFI->hasTailCall()) { 1458 MachineBasicBlock *RestoreBlock = FFI->getRestorePoint(); 1459 for (MachineBasicBlock &MBB : MF) { 1460 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1461 createTailCallBranchInstr(MBB); 1462 } 1463 } 1464 1465 // Early exit if no callee saved registers are modified! 1466 if (CSI.empty() && !needsFP(MF)) { 1467 addScavengingSpillSlot(MF, RS); 1468 return; 1469 } 1470 1471 unsigned MinGPR = PPC::R31; 1472 unsigned MinG8R = PPC::X31; 1473 unsigned MinFPR = PPC::F31; 1474 unsigned MinVR = PPC::V31; 1475 1476 bool HasGPSaveArea = false; 1477 bool HasG8SaveArea = false; 1478 bool HasFPSaveArea = false; 1479 bool HasVRSAVESaveArea = false; 1480 bool HasVRSaveArea = false; 1481 1482 SmallVector<CalleeSavedInfo, 18> GPRegs; 1483 SmallVector<CalleeSavedInfo, 18> G8Regs; 1484 SmallVector<CalleeSavedInfo, 18> FPRegs; 1485 SmallVector<CalleeSavedInfo, 18> VRegs; 1486 1487 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1488 unsigned Reg = CSI[i].getReg(); 1489 if (PPC::GPRCRegClass.contains(Reg)) { 1490 HasGPSaveArea = true; 1491 1492 GPRegs.push_back(CSI[i]); 1493 1494 if (Reg < MinGPR) { 1495 MinGPR = Reg; 1496 } 1497 } else if (PPC::G8RCRegClass.contains(Reg)) { 1498 HasG8SaveArea = true; 1499 1500 G8Regs.push_back(CSI[i]); 1501 1502 if (Reg < MinG8R) { 1503 MinG8R = Reg; 1504 } 1505 } else if (PPC::F8RCRegClass.contains(Reg)) { 1506 HasFPSaveArea = true; 1507 1508 FPRegs.push_back(CSI[i]); 1509 1510 if (Reg < MinFPR) { 1511 MinFPR = Reg; 1512 } 1513 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1514 PPC::CRRCRegClass.contains(Reg)) { 1515 ; // do nothing, as we already know whether CRs are spilled 1516 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1517 HasVRSAVESaveArea = true; 1518 } else if (PPC::VRRCRegClass.contains(Reg)) { 1519 HasVRSaveArea = true; 1520 1521 VRegs.push_back(CSI[i]); 1522 1523 if (Reg < MinVR) { 1524 MinVR = Reg; 1525 } 1526 } else { 1527 llvm_unreachable("Unknown RegisterClass!"); 1528 } 1529 } 1530 1531 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1532 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1533 1534 int64_t LowerBound = 0; 1535 1536 // Take into account stack space reserved for tail calls. 1537 int TCSPDelta = 0; 1538 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1539 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1540 LowerBound = TCSPDelta; 1541 } 1542 1543 // The Floating-point register save area is right below the back chain word 1544 // of the previous stack frame. 1545 if (HasFPSaveArea) { 1546 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1547 int FI = FPRegs[i].getFrameIdx(); 1548 1549 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1550 } 1551 1552 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1553 } 1554 1555 // Check whether the frame pointer register is allocated. If so, make sure it 1556 // is spilled to the correct offset. 1557 if (needsFP(MF)) { 1558 HasGPSaveArea = true; 1559 1560 int FI = PFI->getFramePointerSaveIndex(); 1561 assert(FI && "No Frame Pointer Save Slot!"); 1562 1563 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1564 } 1565 1566 if (PFI->usesPICBase()) { 1567 HasGPSaveArea = true; 1568 1569 int FI = PFI->getPICBasePointerSaveIndex(); 1570 assert(FI && "No PIC Base Pointer Save Slot!"); 1571 1572 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1573 } 1574 1575 const PPCRegisterInfo *RegInfo = 1576 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 1577 if (RegInfo->hasBasePointer(MF)) { 1578 HasGPSaveArea = true; 1579 1580 int FI = PFI->getBasePointerSaveIndex(); 1581 assert(FI && "No Base Pointer Save Slot!"); 1582 1583 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1584 } 1585 1586 // General register save area starts right below the Floating-point 1587 // register save area. 1588 if (HasGPSaveArea || HasG8SaveArea) { 1589 // Move general register save area spill slots down, taking into account 1590 // the size of the Floating-point register save area. 1591 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1592 int FI = GPRegs[i].getFrameIdx(); 1593 1594 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1595 } 1596 1597 // Move general register save area spill slots down, taking into account 1598 // the size of the Floating-point register save area. 1599 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1600 int FI = G8Regs[i].getFrameIdx(); 1601 1602 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1603 } 1604 1605 unsigned MinReg = 1606 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1607 TRI->getEncodingValue(MinG8R)); 1608 1609 if (Subtarget.isPPC64()) { 1610 LowerBound -= (31 - MinReg + 1) * 8; 1611 } else { 1612 LowerBound -= (31 - MinReg + 1) * 4; 1613 } 1614 } 1615 1616 // For 32-bit only, the CR save area is below the general register 1617 // save area. For 64-bit SVR4, the CR save area is addressed relative 1618 // to the stack pointer and hence does not need an adjustment here. 1619 // Only CR2 (the first nonvolatile spilled) has an associated frame 1620 // index so that we have a single uniform save area. 1621 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 1622 // Adjust the frame index of the CR spill slot. 1623 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1624 unsigned Reg = CSI[i].getReg(); 1625 1626 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 1627 // Leave Darwin logic as-is. 1628 || (!Subtarget.isSVR4ABI() && 1629 (PPC::CRBITRCRegClass.contains(Reg) || 1630 PPC::CRRCRegClass.contains(Reg)))) { 1631 int FI = CSI[i].getFrameIdx(); 1632 1633 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1634 } 1635 } 1636 1637 LowerBound -= 4; // The CR save area is always 4 bytes long. 1638 } 1639 1640 if (HasVRSAVESaveArea) { 1641 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 1642 // which have the VRSAVE register class? 1643 // Adjust the frame index of the VRSAVE spill slot. 1644 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1645 unsigned Reg = CSI[i].getReg(); 1646 1647 if (PPC::VRSAVERCRegClass.contains(Reg)) { 1648 int FI = CSI[i].getFrameIdx(); 1649 1650 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1651 } 1652 } 1653 1654 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 1655 } 1656 1657 if (HasVRSaveArea) { 1658 // Insert alignment padding, we need 16-byte alignment. 1659 LowerBound = (LowerBound - 15) & ~(15); 1660 1661 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 1662 int FI = VRegs[i].getFrameIdx(); 1663 1664 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1665 } 1666 } 1667 1668 addScavengingSpillSlot(MF, RS); 1669} 1670 1671void 1672PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 1673 RegScavenger *RS) const { 1674 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 1675 // a large stack, which will require scavenging a register to materialize a 1676 // large offset. 1677 1678 // We need to have a scavenger spill slot for spills if the frame size is 1679 // large. In case there is no free register for large-offset addressing, 1680 // this slot is used for the necessary emergency spill. Also, we need the 1681 // slot for dynamic stack allocations. 1682 1683 // The scavenger might be invoked if the frame offset does not fit into 1684 // the 16-bit immediate. We don't know the complete frame size here 1685 // because we've not yet computed callee-saved register spills or the 1686 // needed alignment padding. 1687 unsigned StackSize = determineFrameLayout(MF, false, true); 1688 MachineFrameInfo *MFI = MF.getFrameInfo(); 1689 if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 1690 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 1691 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; 1692 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; 1693 const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; 1694 RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1695 RC->getAlignment(), 1696 false)); 1697 1698 // Might we have over-aligned allocas? 1699 bool HasAlVars = MFI->hasVarSizedObjects() && 1700 MFI->getMaxAlignment() > getStackAlignment(); 1701 1702 // These kinds of spills might need two registers. 1703 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 1704 RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1705 RC->getAlignment(), 1706 false)); 1707 1708 } 1709} 1710 1711bool 1712PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 1713 MachineBasicBlock::iterator MI, 1714 const std::vector<CalleeSavedInfo> &CSI, 1715 const TargetRegisterInfo *TRI) const { 1716 1717 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 1718 // Return false otherwise to maintain pre-existing behavior. 1719 if (!Subtarget.isSVR4ABI()) 1720 return false; 1721 1722 MachineFunction *MF = MBB.getParent(); 1723 const PPCInstrInfo &TII = 1724 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1725 DebugLoc DL; 1726 bool CRSpilled = false; 1727 MachineInstrBuilder CRMIB; 1728 1729 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1730 unsigned Reg = CSI[i].getReg(); 1731 // Only Darwin actually uses the VRSAVE register, but it can still appear 1732 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 1733 // Darwin, ignore it. 1734 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 1735 continue; 1736 1737 // CR2 through CR4 are the nonvolatile CR fields. 1738 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 1739 1740 // Add the callee-saved register as live-in; it's killed at the spill. 1741 MBB.addLiveIn(Reg); 1742 1743 if (CRSpilled && IsCRField) { 1744 CRMIB.addReg(Reg, RegState::ImplicitKill); 1745 continue; 1746 } 1747 1748 // Insert the spill to the stack frame. 1749 if (IsCRField) { 1750 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 1751 if (Subtarget.isPPC64()) { 1752 // The actual spill will happen at the start of the prologue. 1753 FuncInfo->addMustSaveCR(Reg); 1754 } else { 1755 CRSpilled = true; 1756 FuncInfo->setSpillsCR(); 1757 1758 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 1759 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 1760 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 1761 .addReg(Reg, RegState::ImplicitKill); 1762 1763 MBB.insert(MI, CRMIB); 1764 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 1765 .addReg(PPC::R12, 1766 getKillRegState(true)), 1767 CSI[i].getFrameIdx())); 1768 } 1769 } else { 1770 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1771 TII.storeRegToStackSlot(MBB, MI, Reg, true, 1772 CSI[i].getFrameIdx(), RC, TRI); 1773 } 1774 } 1775 return true; 1776} 1777 1778static void 1779restoreCRs(bool isPPC64, bool is31, 1780 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 1781 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1782 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 1783 1784 MachineFunction *MF = MBB.getParent(); 1785 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 1786 DebugLoc DL; 1787 unsigned RestoreOp, MoveReg; 1788 1789 if (isPPC64) 1790 // This is handled during epilogue generation. 1791 return; 1792 else { 1793 // 32-bit: FP-relative 1794 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 1795 PPC::R12), 1796 CSI[CSIIndex].getFrameIdx())); 1797 RestoreOp = PPC::MTOCRF; 1798 MoveReg = PPC::R12; 1799 } 1800 1801 if (CR2Spilled) 1802 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 1803 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 1804 1805 if (CR3Spilled) 1806 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 1807 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 1808 1809 if (CR4Spilled) 1810 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 1811 .addReg(MoveReg, getKillRegState(true))); 1812} 1813 1814MachineBasicBlock::iterator PPCFrameLowering:: 1815eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1816 MachineBasicBlock::iterator I) const { 1817 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 1818 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1819 I->getOpcode() == PPC::ADJCALLSTACKUP) { 1820 // Add (actually subtract) back the amount the callee popped on return. 1821 if (int CalleeAmt = I->getOperand(1).getImm()) { 1822 bool is64Bit = Subtarget.isPPC64(); 1823 CalleeAmt *= -1; 1824 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 1825 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 1826 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 1827 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 1828 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 1829 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 1830 MachineInstr *MI = I; 1831 const DebugLoc &dl = MI->getDebugLoc(); 1832 1833 if (isInt<16>(CalleeAmt)) { 1834 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 1835 .addReg(StackReg, RegState::Kill) 1836 .addImm(CalleeAmt); 1837 } else { 1838 MachineBasicBlock::iterator MBBI = I; 1839 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 1840 .addImm(CalleeAmt >> 16); 1841 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 1842 .addReg(TmpReg, RegState::Kill) 1843 .addImm(CalleeAmt & 0xFFFF); 1844 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 1845 .addReg(StackReg, RegState::Kill) 1846 .addReg(TmpReg); 1847 } 1848 } 1849 } 1850 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 1851 return MBB.erase(I); 1852} 1853 1854bool 1855PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1856 MachineBasicBlock::iterator MI, 1857 const std::vector<CalleeSavedInfo> &CSI, 1858 const TargetRegisterInfo *TRI) const { 1859 1860 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 1861 // Return false otherwise to maintain pre-existing behavior. 1862 if (!Subtarget.isSVR4ABI()) 1863 return false; 1864 1865 MachineFunction *MF = MBB.getParent(); 1866 const PPCInstrInfo &TII = 1867 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1868 bool CR2Spilled = false; 1869 bool CR3Spilled = false; 1870 bool CR4Spilled = false; 1871 unsigned CSIIndex = 0; 1872 1873 // Initialize insertion-point logic; we will be restoring in reverse 1874 // order of spill. 1875 MachineBasicBlock::iterator I = MI, BeforeI = I; 1876 bool AtStart = I == MBB.begin(); 1877 1878 if (!AtStart) 1879 --BeforeI; 1880 1881 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1882 unsigned Reg = CSI[i].getReg(); 1883 1884 // Only Darwin actually uses the VRSAVE register, but it can still appear 1885 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 1886 // Darwin, ignore it. 1887 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 1888 continue; 1889 1890 if (Reg == PPC::CR2) { 1891 CR2Spilled = true; 1892 // The spill slot is associated only with CR2, which is the 1893 // first nonvolatile spilled. Save it here. 1894 CSIIndex = i; 1895 continue; 1896 } else if (Reg == PPC::CR3) { 1897 CR3Spilled = true; 1898 continue; 1899 } else if (Reg == PPC::CR4) { 1900 CR4Spilled = true; 1901 continue; 1902 } else { 1903 // When we first encounter a non-CR register after seeing at 1904 // least one CR register, restore all spilled CRs together. 1905 if ((CR2Spilled || CR3Spilled || CR4Spilled) 1906 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1907 bool is31 = needsFP(*MF); 1908 restoreCRs(Subtarget.isPPC64(), is31, 1909 CR2Spilled, CR3Spilled, CR4Spilled, 1910 MBB, I, CSI, CSIIndex); 1911 CR2Spilled = CR3Spilled = CR4Spilled = false; 1912 } 1913 1914 // Default behavior for non-CR saves. 1915 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1916 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), 1917 RC, TRI); 1918 assert(I != MBB.begin() && 1919 "loadRegFromStackSlot didn't insert any code!"); 1920 } 1921 1922 // Insert in reverse order. 1923 if (AtStart) 1924 I = MBB.begin(); 1925 else { 1926 I = BeforeI; 1927 ++I; 1928 } 1929 } 1930 1931 // If we haven't yet spilled the CRs, do so now. 1932 if (CR2Spilled || CR3Spilled || CR4Spilled) { 1933 bool is31 = needsFP(*MF); 1934 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 1935 MBB, I, CSI, CSIIndex); 1936 } 1937 1938 return true; 1939} 1940 1941bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 1942 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 1943 MF.getSubtarget<PPCSubtarget>().isPPC64()); 1944} 1945