1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of TargetFrameLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "X86FrameLowering.h" 15#include "X86InstrBuilder.h" 16#include "X86InstrInfo.h" 17#include "X86MachineFunctionInfo.h" 18#include "X86Subtarget.h" 19#include "X86TargetMachine.h" 20#include "llvm/ADT/SmallSet.h" 21#include "llvm/CodeGen/MachineFrameInfo.h" 22#include "llvm/CodeGen/MachineFunction.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineModuleInfo.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/IR/DataLayout.h" 27#include "llvm/IR/Function.h" 28#include "llvm/MC/MCAsmInfo.h" 29#include "llvm/MC/MCSymbol.h" 30#include "llvm/Support/CommandLine.h" 31#include "llvm/Target/TargetOptions.h" 32#include "llvm/Support/Debug.h" 33#include <cstdlib> 34 35using namespace llvm; 36 37// FIXME: completely move here. 38extern cl::opt<bool> ForceStackAlign; 39 40bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 41 return !MF.getFrameInfo()->hasVarSizedObjects() && 42 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 43} 44 45/// canSimplifyCallFramePseudos - If there is a reserved call frame, the 46/// call frame pseudos can be simplified. Having a FP, as in the default 47/// implementation, is not sufficient here since we can't always use it. 48/// Use a more nuanced condition. 49bool 50X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 51 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *> 52 (MF.getSubtarget().getRegisterInfo()); 53 return hasReservedCallFrame(MF) || 54 (hasFP(MF) && !TRI->needsStackRealignment(MF)) 55 || TRI->hasBasePointer(MF); 56} 57 58// needsFrameIndexResolution - Do we need to perform FI resolution for 59// this function. Normally, this is required only when the function 60// has any stack objects. However, FI resolution actually has another job, 61// not apparent from the title - it resolves callframesetup/destroy 62// that were not simplified earlier. 63// So, this is required for x86 functions that have push sequences even 64// when there are no stack objects. 65bool 66X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { 67 return MF.getFrameInfo()->hasStackObjects() || 68 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 69} 70 71/// hasFP - Return true if the specified function should have a dedicated frame 72/// pointer register. This is true if the function has variable sized allocas 73/// or if frame pointer elimination is disabled. 74bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 75 const MachineFrameInfo *MFI = MF.getFrameInfo(); 76 const MachineModuleInfo &MMI = MF.getMMI(); 77 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 78 79 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 80 RegInfo->needsStackRealignment(MF) || 81 MFI->hasVarSizedObjects() || 82 MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || 83 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 84 MMI.callsUnwindInit() || MMI.callsEHReturn() || 85 MFI->hasStackMap() || MFI->hasPatchPoint()); 86} 87 88static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 89 if (IsLP64) { 90 if (isInt<8>(Imm)) 91 return X86::SUB64ri8; 92 return X86::SUB64ri32; 93 } else { 94 if (isInt<8>(Imm)) 95 return X86::SUB32ri8; 96 return X86::SUB32ri; 97 } 98} 99 100static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 101 if (IsLP64) { 102 if (isInt<8>(Imm)) 103 return X86::ADD64ri8; 104 return X86::ADD64ri32; 105 } else { 106 if (isInt<8>(Imm)) 107 return X86::ADD32ri8; 108 return X86::ADD32ri; 109 } 110} 111 112static unsigned getSUBrrOpcode(unsigned isLP64) { 113 return isLP64 ? X86::SUB64rr : X86::SUB32rr; 114} 115 116static unsigned getADDrrOpcode(unsigned isLP64) { 117 return isLP64 ? X86::ADD64rr : X86::ADD32rr; 118} 119 120static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { 121 if (IsLP64) { 122 if (isInt<8>(Imm)) 123 return X86::AND64ri8; 124 return X86::AND64ri32; 125 } 126 if (isInt<8>(Imm)) 127 return X86::AND32ri8; 128 return X86::AND32ri; 129} 130 131static unsigned getLEArOpcode(unsigned IsLP64) { 132 return IsLP64 ? X86::LEA64r : X86::LEA32r; 133} 134 135/// findDeadCallerSavedReg - Return a caller-saved register that isn't live 136/// when it reaches the "return" instruction. We can then pop a stack object 137/// to this register without worry about clobbering it. 138static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 139 MachineBasicBlock::iterator &MBBI, 140 const TargetRegisterInfo &TRI, 141 bool Is64Bit) { 142 const MachineFunction *MF = MBB.getParent(); 143 const Function *F = MF->getFunction(); 144 if (!F || MF->getMMI().callsEHReturn()) 145 return 0; 146 147 static const uint16_t CallerSavedRegs32Bit[] = { 148 X86::EAX, X86::EDX, X86::ECX, 0 149 }; 150 151 static const uint16_t CallerSavedRegs64Bit[] = { 152 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 153 X86::R8, X86::R9, X86::R10, X86::R11, 0 154 }; 155 156 unsigned Opc = MBBI->getOpcode(); 157 switch (Opc) { 158 default: return 0; 159 case X86::RETL: 160 case X86::RETQ: 161 case X86::RETIL: 162 case X86::RETIQ: 163 case X86::TCRETURNdi: 164 case X86::TCRETURNri: 165 case X86::TCRETURNmi: 166 case X86::TCRETURNdi64: 167 case X86::TCRETURNri64: 168 case X86::TCRETURNmi64: 169 case X86::EH_RETURN: 170 case X86::EH_RETURN64: { 171 SmallSet<uint16_t, 8> Uses; 172 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 173 MachineOperand &MO = MBBI->getOperand(i); 174 if (!MO.isReg() || MO.isDef()) 175 continue; 176 unsigned Reg = MO.getReg(); 177 if (!Reg) 178 continue; 179 for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) 180 Uses.insert(*AI); 181 } 182 183 const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 184 for (; *CS; ++CS) 185 if (!Uses.count(*CS)) 186 return *CS; 187 } 188 } 189 190 return 0; 191} 192 193static bool isEAXLiveIn(MachineFunction &MF) { 194 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 195 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 196 unsigned Reg = II->first; 197 198 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || 199 Reg == X86::AH || Reg == X86::AL) 200 return true; 201 } 202 203 return false; 204} 205 206/// emitSPUpdate - Emit a series of instructions to increment / decrement the 207/// stack pointer by a constant value. 208static 209void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 210 unsigned StackPtr, int64_t NumBytes, 211 bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA, 212 const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { 213 bool isSub = NumBytes < 0; 214 uint64_t Offset = isSub ? -NumBytes : NumBytes; 215 unsigned Opc; 216 if (UseLEA) 217 Opc = getLEArOpcode(Is64BitStackPtr); 218 else 219 Opc = isSub 220 ? getSUBriOpcode(Is64BitStackPtr, Offset) 221 : getADDriOpcode(Is64BitStackPtr, Offset); 222 223 uint64_t Chunk = (1LL << 31) - 1; 224 DebugLoc DL = MBB.findDebugLoc(MBBI); 225 226 while (Offset) { 227 if (Offset > Chunk) { 228 // Rather than emit a long series of instructions for large offsets, 229 // load the offset into a register and do one sub/add 230 unsigned Reg = 0; 231 232 if (isSub && !isEAXLiveIn(*MBB.getParent())) 233 Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX); 234 else 235 Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); 236 237 if (Reg) { 238 Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri; 239 BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) 240 .addImm(Offset); 241 Opc = isSub 242 ? getSUBrrOpcode(Is64BitTarget) 243 : getADDrrOpcode(Is64BitTarget); 244 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 245 .addReg(StackPtr) 246 .addReg(Reg); 247 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 248 Offset = 0; 249 continue; 250 } 251 } 252 253 uint64_t ThisVal = std::min(Offset, Chunk); 254 if (ThisVal == (Is64BitTarget ? 8 : 4)) { 255 // Use push / pop instead. 256 unsigned Reg = isSub 257 ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX) 258 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); 259 if (Reg) { 260 Opc = isSub 261 ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r) 262 : (Is64BitTarget ? X86::POP64r : X86::POP32r); 263 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 264 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 265 if (isSub) 266 MI->setFlag(MachineInstr::FrameSetup); 267 Offset -= ThisVal; 268 continue; 269 } 270 } 271 272 MachineInstr *MI = nullptr; 273 274 if (UseLEA) { 275 MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 276 StackPtr, false, isSub ? -ThisVal : ThisVal); 277 } else { 278 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 279 .addReg(StackPtr) 280 .addImm(ThisVal); 281 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 282 } 283 284 if (isSub) 285 MI->setFlag(MachineInstr::FrameSetup); 286 287 Offset -= ThisVal; 288 } 289} 290 291/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 292static 293void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 294 unsigned StackPtr, uint64_t *NumBytes = nullptr) { 295 if (MBBI == MBB.begin()) return; 296 297 MachineBasicBlock::iterator PI = std::prev(MBBI); 298 unsigned Opc = PI->getOpcode(); 299 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 300 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 301 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 302 PI->getOperand(0).getReg() == StackPtr) { 303 if (NumBytes) 304 *NumBytes += PI->getOperand(2).getImm(); 305 MBB.erase(PI); 306 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 307 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 308 PI->getOperand(0).getReg() == StackPtr) { 309 if (NumBytes) 310 *NumBytes -= PI->getOperand(2).getImm(); 311 MBB.erase(PI); 312 } 313} 314 315/// mergeSPUpdates - Checks the instruction before/after the passed 316/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and 317/// the stack adjustment is returned as a positive value for ADD/LEA and a 318/// negative for SUB. 319static int mergeSPUpdates(MachineBasicBlock &MBB, 320 MachineBasicBlock::iterator &MBBI, unsigned StackPtr, 321 bool doMergeWithPrevious) { 322 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 323 (!doMergeWithPrevious && MBBI == MBB.end())) 324 return 0; 325 326 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; 327 MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr 328 : std::next(MBBI); 329 unsigned Opc = PI->getOpcode(); 330 int Offset = 0; 331 332 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 333 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 334 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 335 PI->getOperand(0).getReg() == StackPtr){ 336 Offset += PI->getOperand(2).getImm(); 337 MBB.erase(PI); 338 if (!doMergeWithPrevious) MBBI = NI; 339 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 340 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 341 PI->getOperand(0).getReg() == StackPtr) { 342 Offset -= PI->getOperand(2).getImm(); 343 MBB.erase(PI); 344 if (!doMergeWithPrevious) MBBI = NI; 345 } 346 347 return Offset; 348} 349 350void 351X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, 352 MachineBasicBlock::iterator MBBI, 353 DebugLoc DL) const { 354 MachineFunction &MF = *MBB.getParent(); 355 MachineFrameInfo *MFI = MF.getFrameInfo(); 356 MachineModuleInfo &MMI = MF.getMMI(); 357 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 358 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 359 360 // Add callee saved registers to move list. 361 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 362 if (CSI.empty()) return; 363 364 // Calculate offsets. 365 for (std::vector<CalleeSavedInfo>::const_iterator 366 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 367 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 368 unsigned Reg = I->getReg(); 369 370 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 371 unsigned CFIIndex = 372 MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 373 Offset)); 374 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 375 .addCFIIndex(CFIIndex); 376 } 377} 378 379/// usesTheStack - This function checks if any of the users of EFLAGS 380/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 381/// to use the stack, and if we don't adjust the stack we clobber the first 382/// frame index. 383/// See X86InstrInfo::copyPhysReg. 384static bool usesTheStack(const MachineFunction &MF) { 385 const MachineRegisterInfo &MRI = MF.getRegInfo(); 386 387 for (MachineRegisterInfo::reg_instr_iterator 388 ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); 389 ri != re; ++ri) 390 if (ri->isCopy()) 391 return true; 392 393 return false; 394} 395 396void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, 397 MachineBasicBlock &MBB, 398 MachineBasicBlock::iterator MBBI, 399 DebugLoc DL) { 400 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 401 const TargetInstrInfo &TII = *STI.getInstrInfo(); 402 bool Is64Bit = STI.is64Bit(); 403 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; 404 405 unsigned CallOp; 406 if (Is64Bit) 407 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; 408 else 409 CallOp = X86::CALLpcrel32; 410 411 const char *Symbol; 412 if (Is64Bit) { 413 if (STI.isTargetCygMing()) { 414 Symbol = "___chkstk_ms"; 415 } else { 416 Symbol = "__chkstk"; 417 } 418 } else if (STI.isTargetCygMing()) 419 Symbol = "_alloca"; 420 else 421 Symbol = "_chkstk"; 422 423 MachineInstrBuilder CI; 424 425 // All current stack probes take AX and SP as input, clobber flags, and 426 // preserve all registers. x86_64 probes leave RSP unmodified. 427 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 428 // For the large code model, we have to call through a register. Use R11, 429 // as it is scratch in all supported calling conventions. 430 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) 431 .addExternalSymbol(Symbol); 432 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); 433 } else { 434 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); 435 } 436 437 unsigned AX = Is64Bit ? X86::RAX : X86::EAX; 438 unsigned SP = Is64Bit ? X86::RSP : X86::ESP; 439 CI.addReg(AX, RegState::Implicit) 440 .addReg(SP, RegState::Implicit) 441 .addReg(AX, RegState::Define | RegState::Implicit) 442 .addReg(SP, RegState::Define | RegState::Implicit) 443 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 444 445 if (Is64Bit) { 446 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp 447 // themselves. It also does not clobber %rax so we can reuse it when 448 // adjusting %rsp. 449 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) 450 .addReg(X86::RSP) 451 .addReg(X86::RAX); 452 } 453} 454 455static unsigned calculateSetFPREG(uint64_t SPAdjust) { 456 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well 457 // and might require smaller successive adjustments. 458 const uint64_t Win64MaxSEHOffset = 128; 459 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); 460 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. 461 return SEHFrameOffset & -16; 462} 463 464// If we're forcing a stack realignment we can't rely on just the frame 465// info, we need to know the ABI stack alignment as well in case we 466// have a call out. Otherwise just make sure we have some alignment - we'll 467// go with the minimum SlotSize. 468static uint64_t calculateMaxStackAlign(const MachineFunction &MF) { 469 const MachineFrameInfo *MFI = MF.getFrameInfo(); 470 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 471 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 472 const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); 473 unsigned SlotSize = RegInfo->getSlotSize(); 474 unsigned StackAlign = STI.getFrameLowering()->getStackAlignment(); 475 if (ForceStackAlign) { 476 if (MFI->hasCalls()) 477 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 478 else if (MaxAlign < SlotSize) 479 MaxAlign = SlotSize; 480 } 481 return MaxAlign; 482} 483 484/// emitPrologue - Push callee-saved registers onto the stack, which 485/// automatically adjust the stack pointer. Adjust the stack pointer to allocate 486/// space for local variables. Also emit labels used by the exception handler to 487/// generate the exception handling frames. 488 489/* 490 Here's a gist of what gets emitted: 491 492 ; Establish frame pointer, if needed 493 [if needs FP] 494 push %rbp 495 .cfi_def_cfa_offset 16 496 .cfi_offset %rbp, -16 497 .seh_pushreg %rpb 498 mov %rsp, %rbp 499 .cfi_def_cfa_register %rbp 500 501 ; Spill general-purpose registers 502 [for all callee-saved GPRs] 503 pushq %<reg> 504 [if not needs FP] 505 .cfi_def_cfa_offset (offset from RETADDR) 506 .seh_pushreg %<reg> 507 508 ; If the required stack alignment > default stack alignment 509 ; rsp needs to be re-aligned. This creates a "re-alignment gap" 510 ; of unknown size in the stack frame. 511 [if stack needs re-alignment] 512 and $MASK, %rsp 513 514 ; Allocate space for locals 515 [if target is Windows and allocated space > 4096 bytes] 516 ; Windows needs special care for allocations larger 517 ; than one page. 518 mov $NNN, %rax 519 call ___chkstk_ms/___chkstk 520 sub %rax, %rsp 521 [else] 522 sub $NNN, %rsp 523 524 [if needs FP] 525 .seh_stackalloc (size of XMM spill slots) 526 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots 527 [else] 528 .seh_stackalloc NNN 529 530 ; Spill XMMs 531 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, 532 ; they may get spilled on any platform, if the current function 533 ; calls @llvm.eh.unwind.init 534 [if needs FP] 535 [for all callee-saved XMM registers] 536 movaps %<xmm reg>, -MMM(%rbp) 537 [for all callee-saved XMM registers] 538 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) 539 ; i.e. the offset relative to (%rbp - SEHFrameOffset) 540 [else] 541 [for all callee-saved XMM registers] 542 movaps %<xmm reg>, KKK(%rsp) 543 [for all callee-saved XMM registers] 544 .seh_savexmm %<xmm reg>, KKK 545 546 .seh_endprologue 547 548 [if needs base pointer] 549 mov %rsp, %rbx 550 [if needs to restore base pointer] 551 mov %rsp, -MMM(%rbp) 552 553 ; Emit CFI info 554 [if needs FP] 555 [for all callee-saved registers] 556 .cfi_offset %<reg>, (offset from %rbp) 557 [else] 558 .cfi_def_cfa_offset (offset from RETADDR) 559 [for all callee-saved registers] 560 .cfi_offset %<reg>, (offset from %rsp) 561 562 Notes: 563 - .seh directives are emitted only for Windows 64 ABI 564 - .cfi directives are emitted for all other ABIs 565 - for 32-bit code, substitute %e?? registers for %r?? 566*/ 567 568void X86FrameLowering::emitPrologue(MachineFunction &MF) const { 569 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 570 MachineBasicBlock::iterator MBBI = MBB.begin(); 571 MachineFrameInfo *MFI = MF.getFrameInfo(); 572 const Function *Fn = MF.getFunction(); 573 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 574 const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); 575 const TargetInstrInfo &TII = *STI.getInstrInfo(); 576 MachineModuleInfo &MMI = MF.getMMI(); 577 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 578 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. 579 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 580 bool HasFP = hasFP(MF); 581 bool Is64Bit = STI.is64Bit(); 582 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 583 const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 584 bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv()); 585 // Not necessarily synonymous with IsWin64. 586 bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 587 bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); 588 bool NeedsDwarfCFI = 589 !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); 590 bool UseLEA = STI.useLeaForSP(); 591 unsigned SlotSize = RegInfo->getSlotSize(); 592 unsigned FramePtr = RegInfo->getFrameRegister(MF); 593 const unsigned MachineFramePtr = 594 STI.isTarget64BitILP32() 595 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 596 : FramePtr; 597 unsigned StackPtr = RegInfo->getStackRegister(); 598 unsigned BasePtr = RegInfo->getBaseRegister(); 599 DebugLoc DL; 600 601 // Add RETADDR move area to callee saved frame size. 602 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 603 if (TailCallReturnAddrDelta && IsWinEH) 604 report_fatal_error("Can't handle guaranteed tail call under win64 yet"); 605 606 if (TailCallReturnAddrDelta < 0) 607 X86FI->setCalleeSavedFrameSize( 608 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 609 610 bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); 611 612 // The default stack probe size is 4096 if the function has no stackprobesize 613 // attribute. 614 unsigned StackProbeSize = 4096; 615 if (Fn->hasFnAttribute("stack-probe-size")) 616 Fn->getFnAttribute("stack-probe-size") 617 .getValueAsString() 618 .getAsInteger(0, StackProbeSize); 619 620 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 621 // function, and use up to 128 bytes of stack space, don't have a frame 622 // pointer, calls, or dynamic alloca then we do not need to adjust the 623 // stack pointer (we fit in the Red Zone). We also check that we don't 624 // push and pop from the stack. 625 if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && 626 !RegInfo->needsStackRealignment(MF) && 627 !MFI->hasVarSizedObjects() && // No dynamic alloca. 628 !MFI->adjustsStack() && // No calls. 629 !IsWin64 && // Win64 has no Red Zone 630 !usesTheStack(MF) && // Don't push and pop. 631 !MF.shouldSplitStack()) { // Regular stack 632 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 633 if (HasFP) MinSize += SlotSize; 634 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 635 MFI->setStackSize(StackSize); 636 } 637 638 // Insert stack pointer adjustment for later moving of return addr. Only 639 // applies to tail call optimized functions where the callee argument stack 640 // size is bigger than the callers. 641 if (TailCallReturnAddrDelta < 0) { 642 MachineInstr *MI = 643 BuildMI(MBB, MBBI, DL, 644 TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)), 645 StackPtr) 646 .addReg(StackPtr) 647 .addImm(-TailCallReturnAddrDelta) 648 .setMIFlag(MachineInstr::FrameSetup); 649 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 650 } 651 652 // Mapping for machine moves: 653 // 654 // DST: VirtualFP AND 655 // SRC: VirtualFP => DW_CFA_def_cfa_offset 656 // ELSE => DW_CFA_def_cfa 657 // 658 // SRC: VirtualFP AND 659 // DST: Register => DW_CFA_def_cfa_register 660 // 661 // ELSE 662 // OFFSET < 0 => DW_CFA_offset_extended_sf 663 // REG < 64 => DW_CFA_offset + Reg 664 // ELSE => DW_CFA_offset_extended 665 666 uint64_t NumBytes = 0; 667 int stackGrowth = -SlotSize; 668 669 if (HasFP) { 670 // Calculate required stack adjustment. 671 uint64_t FrameSize = StackSize - SlotSize; 672 // If required, include space for extra hidden slot for stashing base pointer. 673 if (X86FI->getRestoreBasePointer()) 674 FrameSize += SlotSize; 675 676 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 677 678 // Callee-saved registers are pushed on stack before the stack is realigned. 679 if (RegInfo->needsStackRealignment(MF) && !IsWinEH) 680 NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); 681 682 // Get the offset of the stack slot for the EBP register, which is 683 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 684 // Update the frame offset adjustment. 685 MFI->setOffsetAdjustment(-NumBytes); 686 687 // Save EBP/RBP into the appropriate stack slot. 688 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 689 .addReg(MachineFramePtr, RegState::Kill) 690 .setMIFlag(MachineInstr::FrameSetup); 691 692 if (NeedsDwarfCFI) { 693 // Mark the place where EBP/RBP was saved. 694 // Define the current CFA rule to use the provided offset. 695 assert(StackSize); 696 unsigned CFIIndex = MMI.addFrameInst( 697 MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); 698 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 699 .addCFIIndex(CFIIndex); 700 701 // Change the rule for the FramePtr to be an "offset" rule. 702 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); 703 CFIIndex = MMI.addFrameInst( 704 MCCFIInstruction::createOffset(nullptr, 705 DwarfFramePtr, 2 * stackGrowth)); 706 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 707 .addCFIIndex(CFIIndex); 708 } 709 710 if (NeedsWinEH) { 711 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 712 .addImm(FramePtr) 713 .setMIFlag(MachineInstr::FrameSetup); 714 } 715 716 if (!IsWinEH) { 717 // Update EBP with the new base value. 718 BuildMI(MBB, MBBI, DL, 719 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 720 FramePtr) 721 .addReg(StackPtr) 722 .setMIFlag(MachineInstr::FrameSetup); 723 } 724 725 if (NeedsDwarfCFI) { 726 // Mark effective beginning of when frame pointer becomes valid. 727 // Define the current CFA to use the EBP/RBP register. 728 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); 729 unsigned CFIIndex = MMI.addFrameInst( 730 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); 731 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 732 .addCFIIndex(CFIIndex); 733 } 734 735 // Mark the FramePtr as live-in in every block. 736 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 737 I->addLiveIn(MachineFramePtr); 738 } else { 739 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 740 } 741 742 // Skip the callee-saved push instructions. 743 bool PushedRegs = false; 744 int StackOffset = 2 * stackGrowth; 745 746 while (MBBI != MBB.end() && 747 (MBBI->getOpcode() == X86::PUSH32r || 748 MBBI->getOpcode() == X86::PUSH64r)) { 749 PushedRegs = true; 750 unsigned Reg = MBBI->getOperand(0).getReg(); 751 ++MBBI; 752 753 if (!HasFP && NeedsDwarfCFI) { 754 // Mark callee-saved push instruction. 755 // Define the current CFA rule to use the provided offset. 756 assert(StackSize); 757 unsigned CFIIndex = MMI.addFrameInst( 758 MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); 759 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 760 .addCFIIndex(CFIIndex); 761 StackOffset += stackGrowth; 762 } 763 764 if (NeedsWinEH) { 765 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( 766 MachineInstr::FrameSetup); 767 } 768 } 769 770 // Realign stack after we pushed callee-saved registers (so that we'll be 771 // able to calculate their offsets from the frame pointer). 772 // Don't do this for Win64, it needs to realign the stack after the prologue. 773 if (!IsWinEH && RegInfo->needsStackRealignment(MF)) { 774 assert(HasFP && "There should be a frame pointer if stack is realigned."); 775 uint64_t Val = -MaxAlign; 776 MachineInstr *MI = 777 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 778 StackPtr) 779 .addReg(StackPtr) 780 .addImm(Val) 781 .setMIFlag(MachineInstr::FrameSetup); 782 783 // The EFLAGS implicit def is dead. 784 MI->getOperand(3).setIsDead(); 785 } 786 787 // If there is an SUB32ri of ESP immediately before this instruction, merge 788 // the two. This can be the case when tail call elimination is enabled and 789 // the callee has more arguments then the caller. 790 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 791 792 // Adjust stack pointer: ESP -= numbytes. 793 794 // Windows and cygwin/mingw require a prologue helper routine when allocating 795 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 796 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 797 // stack and adjust the stack pointer in one go. The 64-bit version of 798 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 799 // responsible for adjusting the stack pointer. Touching the stack at 4K 800 // increments is necessary to ensure that the guard pages used by the OS 801 // virtual memory manager are allocated in correct sequence. 802 uint64_t AlignedNumBytes = NumBytes; 803 if (IsWinEH && RegInfo->needsStackRealignment(MF)) 804 AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); 805 if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { 806 // Check whether EAX is livein for this function. 807 bool isEAXAlive = isEAXLiveIn(MF); 808 809 if (isEAXAlive) { 810 // Sanity check that EAX is not livein for this function. 811 // It should not be, so throw an assert. 812 assert(!Is64Bit && "EAX is livein in x64 case!"); 813 814 // Save EAX 815 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 816 .addReg(X86::EAX, RegState::Kill) 817 .setMIFlag(MachineInstr::FrameSetup); 818 } 819 820 if (Is64Bit) { 821 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 822 // Function prologue is responsible for adjusting the stack pointer. 823 if (isUInt<32>(NumBytes)) { 824 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 825 .addImm(NumBytes) 826 .setMIFlag(MachineInstr::FrameSetup); 827 } else if (isInt<32>(NumBytes)) { 828 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) 829 .addImm(NumBytes) 830 .setMIFlag(MachineInstr::FrameSetup); 831 } else { 832 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 833 .addImm(NumBytes) 834 .setMIFlag(MachineInstr::FrameSetup); 835 } 836 } else { 837 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 838 // We'll also use 4 already allocated bytes for EAX. 839 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 840 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 841 .setMIFlag(MachineInstr::FrameSetup); 842 } 843 844 // Save a pointer to the MI where we set AX. 845 MachineBasicBlock::iterator SetRAX = MBBI; 846 --SetRAX; 847 848 // Call __chkstk, __chkstk_ms, or __alloca. 849 emitStackProbeCall(MF, MBB, MBBI, DL); 850 851 // Apply the frame setup flag to all inserted instrs. 852 for (; SetRAX != MBBI; ++SetRAX) 853 SetRAX->setFlag(MachineInstr::FrameSetup); 854 855 if (isEAXAlive) { 856 // Restore EAX 857 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 858 X86::EAX), 859 StackPtr, false, NumBytes - 4); 860 MI->setFlag(MachineInstr::FrameSetup); 861 MBB.insert(MBBI, MI); 862 } 863 } else if (NumBytes) { 864 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr, 865 UseLEA, TII, *RegInfo); 866 } 867 868 if (NeedsWinEH && NumBytes) 869 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) 870 .addImm(NumBytes) 871 .setMIFlag(MachineInstr::FrameSetup); 872 873 int SEHFrameOffset = 0; 874 if (IsWinEH && HasFP) { 875 SEHFrameOffset = calculateSetFPREG(NumBytes); 876 if (SEHFrameOffset) 877 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), 878 StackPtr, false, SEHFrameOffset); 879 else 880 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); 881 882 if (NeedsWinEH) 883 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 884 .addImm(FramePtr) 885 .addImm(SEHFrameOffset) 886 .setMIFlag(MachineInstr::FrameSetup); 887 } 888 889 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { 890 const MachineInstr *FrameInstr = &*MBBI; 891 ++MBBI; 892 893 if (NeedsWinEH) { 894 int FI; 895 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { 896 if (X86::FR64RegClass.contains(Reg)) { 897 int Offset = getFrameIndexOffset(MF, FI); 898 Offset += SEHFrameOffset; 899 900 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) 901 .addImm(Reg) 902 .addImm(Offset) 903 .setMIFlag(MachineInstr::FrameSetup); 904 } 905 } 906 } 907 } 908 909 if (NeedsWinEH) 910 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) 911 .setMIFlag(MachineInstr::FrameSetup); 912 913 // Realign stack after we spilled callee-saved registers (so that we'll be 914 // able to calculate their offsets from the frame pointer). 915 // Win64 requires aligning the stack after the prologue. 916 if (IsWinEH && RegInfo->needsStackRealignment(MF)) { 917 assert(HasFP && "There should be a frame pointer if stack is realigned."); 918 uint64_t Val = -MaxAlign; 919 MachineInstr *MI = 920 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 921 StackPtr) 922 .addReg(StackPtr) 923 .addImm(Val) 924 .setMIFlag(MachineInstr::FrameSetup); 925 926 // The EFLAGS implicit def is dead. 927 MI->getOperand(3).setIsDead(); 928 } 929 930 // If we need a base pointer, set it up here. It's whatever the value 931 // of the stack pointer is at this point. Any variable size objects 932 // will be allocated after this, so we can still use the base pointer 933 // to reference locals. 934 if (RegInfo->hasBasePointer(MF)) { 935 // Update the base pointer with the current stack pointer. 936 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; 937 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 938 .addReg(StackPtr) 939 .setMIFlag(MachineInstr::FrameSetup); 940 if (X86FI->getRestoreBasePointer()) { 941 // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain. 942 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 943 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), 944 FramePtr, true, X86FI->getRestoreBasePointerOffset()) 945 .addReg(StackPtr) 946 .setMIFlag(MachineInstr::FrameSetup); 947 } 948 } 949 950 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { 951 // Mark end of stack pointer adjustment. 952 if (!HasFP && NumBytes) { 953 // Define the current CFA rule to use the provided offset. 954 assert(StackSize); 955 unsigned CFIIndex = MMI.addFrameInst( 956 MCCFIInstruction::createDefCfaOffset(nullptr, 957 -StackSize + stackGrowth)); 958 959 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 960 .addCFIIndex(CFIIndex); 961 } 962 963 // Emit DWARF info specifying the offsets of the callee-saved registers. 964 if (PushedRegs) 965 emitCalleeSavedFrameMoves(MBB, MBBI, DL); 966 } 967} 968 969void X86FrameLowering::emitEpilogue(MachineFunction &MF, 970 MachineBasicBlock &MBB) const { 971 const MachineFrameInfo *MFI = MF.getFrameInfo(); 972 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 973 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 974 const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); 975 const TargetInstrInfo &TII = *STI.getInstrInfo(); 976 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 977 assert(MBBI != MBB.end() && "Returning block has no instructions"); 978 unsigned RetOpcode = MBBI->getOpcode(); 979 DebugLoc DL = MBBI->getDebugLoc(); 980 bool Is64Bit = STI.is64Bit(); 981 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 982 const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 983 bool HasFP = hasFP(MF); 984 const bool Is64BitILP32 = STI.isTarget64BitILP32(); 985 unsigned SlotSize = RegInfo->getSlotSize(); 986 unsigned FramePtr = RegInfo->getFrameRegister(MF); 987 unsigned MachineFramePtr = 988 Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 989 : FramePtr; 990 unsigned StackPtr = RegInfo->getStackRegister(); 991 992 bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 993 bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry(); 994 bool UseLEAForSP = false; 995 996 // We can't use LEA instructions for adjusting the stack pointer if this is a 997 // leaf function in the Win64 ABI. Only ADD instructions may be used to 998 // deallocate the stack. 999 if (STI.useLeaForSP()) { 1000 if (!IsWinEH) { 1001 // We *aren't* using the Win64 ABI which means we are free to use LEA. 1002 UseLEAForSP = true; 1003 } else if (HasFP) { 1004 // We *have* a frame pointer which means we are permitted to use LEA. 1005 UseLEAForSP = true; 1006 } 1007 } 1008 1009 switch (RetOpcode) { 1010 default: 1011 llvm_unreachable("Can only insert epilogue into returning blocks"); 1012 case X86::RETQ: 1013 case X86::RETL: 1014 case X86::RETIL: 1015 case X86::RETIQ: 1016 case X86::TCRETURNdi: 1017 case X86::TCRETURNri: 1018 case X86::TCRETURNmi: 1019 case X86::TCRETURNdi64: 1020 case X86::TCRETURNri64: 1021 case X86::TCRETURNmi64: 1022 case X86::EH_RETURN: 1023 case X86::EH_RETURN64: 1024 break; // These are ok 1025 } 1026 1027 // Get the number of bytes to allocate from the FrameInfo. 1028 uint64_t StackSize = MFI->getStackSize(); 1029 uint64_t MaxAlign = calculateMaxStackAlign(MF); 1030 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1031 uint64_t NumBytes = 0; 1032 1033 if (hasFP(MF)) { 1034 // Calculate required stack adjustment. 1035 uint64_t FrameSize = StackSize - SlotSize; 1036 NumBytes = FrameSize - CSSize; 1037 1038 // Callee-saved registers were pushed on stack before the stack was 1039 // realigned. 1040 if (RegInfo->needsStackRealignment(MF) && !IsWinEH) 1041 NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); 1042 1043 // Pop EBP. 1044 BuildMI(MBB, MBBI, DL, 1045 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); 1046 } else { 1047 NumBytes = StackSize - CSSize; 1048 } 1049 uint64_t SEHStackAllocAmt = NumBytes; 1050 1051 // Skip the callee-saved pop instructions. 1052 while (MBBI != MBB.begin()) { 1053 MachineBasicBlock::iterator PI = std::prev(MBBI); 1054 unsigned Opc = PI->getOpcode(); 1055 1056 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 1057 !PI->isTerminator()) 1058 break; 1059 1060 --MBBI; 1061 } 1062 MachineBasicBlock::iterator FirstCSPop = MBBI; 1063 1064 DL = MBBI->getDebugLoc(); 1065 1066 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1067 // instruction, merge the two instructions. 1068 if (NumBytes || MFI->hasVarSizedObjects()) 1069 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 1070 1071 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1072 // slot before popping them off! Same applies for the case, when stack was 1073 // realigned. 1074 if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { 1075 if (RegInfo->needsStackRealignment(MF)) 1076 MBBI = FirstCSPop; 1077 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); 1078 uint64_t LEAAmount = IsWinEH ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; 1079 1080 // There are only two legal forms of epilogue: 1081 // - add SEHAllocationSize, %rsp 1082 // - lea SEHAllocationSize(%FramePtr), %rsp 1083 // 1084 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. 1085 // However, we may use this sequence if we have a frame pointer because the 1086 // effects of the prologue can safely be undone. 1087 if (LEAAmount != 0) { 1088 unsigned Opc = getLEArOpcode(Uses64BitFramePtr); 1089 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 1090 FramePtr, false, LEAAmount); 1091 --MBBI; 1092 } else { 1093 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); 1094 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 1095 .addReg(FramePtr); 1096 --MBBI; 1097 } 1098 } else if (NumBytes) { 1099 // Adjust stack pointer back: ESP += numbytes. 1100 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, 1101 UseLEAForSP, TII, *RegInfo); 1102 --MBBI; 1103 } 1104 1105 // Windows unwinder will not invoke function's exception handler if IP is 1106 // either in prologue or in epilogue. This behavior causes a problem when a 1107 // call immediately precedes an epilogue, because the return address points 1108 // into the epilogue. To cope with that, we insert an epilogue marker here, 1109 // then replace it with a 'nop' if it ends up immediately after a CALL in the 1110 // final emitted code. 1111 if (NeedsWinEH) 1112 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); 1113 1114 // We're returning from function via eh_return. 1115 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 1116 MBBI = MBB.getLastNonDebugInstr(); 1117 MachineOperand &DestAddr = MBBI->getOperand(0); 1118 assert(DestAddr.isReg() && "Offset should be in register!"); 1119 BuildMI(MBB, MBBI, DL, 1120 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 1121 StackPtr).addReg(DestAddr.getReg()); 1122 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 1123 RetOpcode == X86::TCRETURNmi || 1124 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 1125 RetOpcode == X86::TCRETURNmi64) { 1126 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 1127 // Tail call return: adjust the stack pointer and jump to callee. 1128 MBBI = MBB.getLastNonDebugInstr(); 1129 MachineOperand &JumpTarget = MBBI->getOperand(0); 1130 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 1131 assert(StackAdjust.isImm() && "Expecting immediate value."); 1132 1133 // Adjust stack pointer. 1134 int StackAdj = StackAdjust.getImm(); 1135 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 1136 int Offset = 0; 1137 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 1138 1139 // Incoporate the retaddr area. 1140 Offset = StackAdj-MaxTCDelta; 1141 assert(Offset >= 0 && "Offset should never be negative"); 1142 1143 if (Offset) { 1144 // Check for possible merge with preceding ADD instruction. 1145 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1146 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr, 1147 UseLEAForSP, TII, *RegInfo); 1148 } 1149 1150 // Jump to label or value in register. 1151 bool IsWin64 = STI.isTargetWin64(); 1152 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 1153 unsigned Op = (RetOpcode == X86::TCRETURNdi) 1154 ? X86::TAILJMPd 1155 : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); 1156 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); 1157 if (JumpTarget.isGlobal()) 1158 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 1159 JumpTarget.getTargetFlags()); 1160 else { 1161 assert(JumpTarget.isSymbol()); 1162 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 1163 JumpTarget.getTargetFlags()); 1164 } 1165 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 1166 unsigned Op = (RetOpcode == X86::TCRETURNmi) 1167 ? X86::TAILJMPm 1168 : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); 1169 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); 1170 for (unsigned i = 0; i != 5; ++i) 1171 MIB.addOperand(MBBI->getOperand(i)); 1172 } else if (RetOpcode == X86::TCRETURNri64) { 1173 BuildMI(MBB, MBBI, DL, 1174 TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) 1175 .addReg(JumpTarget.getReg(), RegState::Kill); 1176 } else { 1177 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 1178 addReg(JumpTarget.getReg(), RegState::Kill); 1179 } 1180 1181 MachineInstr *NewMI = std::prev(MBBI); 1182 NewMI->copyImplicitOps(MF, MBBI); 1183 1184 // Delete the pseudo instruction TCRETURN. 1185 MBB.erase(MBBI); 1186 } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL || 1187 RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) && 1188 (X86FI->getTCReturnAddrDelta() < 0)) { 1189 // Add the return addr area delta back since we are not tail calling. 1190 int delta = -1*X86FI->getTCReturnAddrDelta(); 1191 MBBI = MBB.getLastNonDebugInstr(); 1192 1193 // Check for possible merge with preceding ADD instruction. 1194 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1195 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, 1196 UseLEAForSP, TII, *RegInfo); 1197 } 1198} 1199 1200int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, 1201 int FI) const { 1202 const X86RegisterInfo *RegInfo = 1203 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1204 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1205 // Offset will hold the offset from the stack pointer at function entry to the 1206 // object. 1207 // We need to factor in additional offsets applied during the prologue to the 1208 // frame, base, and stack pointer depending on which is used. 1209 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1210 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1211 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1212 uint64_t StackSize = MFI->getStackSize(); 1213 unsigned SlotSize = RegInfo->getSlotSize(); 1214 bool HasFP = hasFP(MF); 1215 bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1216 int64_t FPDelta = 0; 1217 1218 if (IsWinEH) { 1219 assert(!MFI->hasCalls() || (StackSize % 16) == 8); 1220 1221 // Calculate required stack adjustment. 1222 uint64_t FrameSize = StackSize - SlotSize; 1223 // If required, include space for extra hidden slot for stashing base pointer. 1224 if (X86FI->getRestoreBasePointer()) 1225 FrameSize += SlotSize; 1226 uint64_t NumBytes = FrameSize - CSSize; 1227 1228 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); 1229 if (FI && FI == X86FI->getFAIndex()) 1230 return -SEHFrameOffset; 1231 1232 // FPDelta is the offset from the "traditional" FP location of the old base 1233 // pointer followed by return address and the location required by the 1234 // restricted Win64 prologue. 1235 // Add FPDelta to all offsets below that go through the frame pointer. 1236 FPDelta = FrameSize - SEHFrameOffset; 1237 assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && 1238 "FPDelta isn't aligned per the Win64 ABI!"); 1239 } 1240 1241 1242 if (RegInfo->hasBasePointer(MF)) { 1243 assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); 1244 if (FI < 0) { 1245 // Skip the saved EBP. 1246 return Offset + SlotSize + FPDelta; 1247 } else { 1248 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1249 return Offset + StackSize; 1250 } 1251 } else if (RegInfo->needsStackRealignment(MF)) { 1252 if (FI < 0) { 1253 // Skip the saved EBP. 1254 return Offset + SlotSize + FPDelta; 1255 } else { 1256 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1257 return Offset + StackSize; 1258 } 1259 // FIXME: Support tail calls 1260 } else { 1261 if (!HasFP) 1262 return Offset + StackSize; 1263 1264 // Skip the saved EBP. 1265 Offset += SlotSize; 1266 1267 // Skip the RETADDR move area 1268 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1269 if (TailCallReturnAddrDelta < 0) 1270 Offset -= TailCallReturnAddrDelta; 1271 } 1272 1273 return Offset + FPDelta; 1274} 1275 1276int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 1277 unsigned &FrameReg) const { 1278 const X86RegisterInfo *RegInfo = 1279 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1280 // We can't calculate offset from frame pointer if the stack is realigned, 1281 // so enforce usage of stack/base pointer. The base pointer is used when we 1282 // have dynamic allocas in addition to dynamic realignment. 1283 if (RegInfo->hasBasePointer(MF)) 1284 FrameReg = RegInfo->getBaseRegister(); 1285 else if (RegInfo->needsStackRealignment(MF)) 1286 FrameReg = RegInfo->getStackRegister(); 1287 else 1288 FrameReg = RegInfo->getFrameRegister(MF); 1289 return getFrameIndexOffset(MF, FI); 1290} 1291 1292// Simplified from getFrameIndexOffset keeping only StackPointer cases 1293int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const { 1294 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1295 // Does not include any dynamic realign. 1296 const uint64_t StackSize = MFI->getStackSize(); 1297 { 1298#ifndef NDEBUG 1299 const X86RegisterInfo *RegInfo = 1300 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1301 // Note: LLVM arranges the stack as: 1302 // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) 1303 // > "Stack Slots" (<--SP) 1304 // We can always address StackSlots from RSP. We can usually (unless 1305 // needsStackRealignment) address CSRs from RSP, but sometimes need to 1306 // address them from RBP. FixedObjects can be placed anywhere in the stack 1307 // frame depending on their specific requirements (i.e. we can actually 1308 // refer to arguments to the function which are stored in the *callers* 1309 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs 1310 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. 1311 1312 assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); 1313 1314 // We don't handle tail calls, and shouldn't be seeing them 1315 // either. 1316 int TailCallReturnAddrDelta = 1317 MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); 1318 assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); 1319#endif 1320 } 1321 1322 // This is how the math works out: 1323 // 1324 // %rsp grows (i.e. gets lower) left to right. Each box below is 1325 // one word (eight bytes). Obj0 is the stack slot we're trying to 1326 // get to. 1327 // 1328 // ---------------------------------- 1329 // | BP | Obj0 | Obj1 | ... | ObjN | 1330 // ---------------------------------- 1331 // ^ ^ ^ ^ 1332 // A B C E 1333 // 1334 // A is the incoming stack pointer. 1335 // (B - A) is the local area offset (-8 for x86-64) [1] 1336 // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2] 1337 // 1338 // |(E - B)| is the StackSize (absolute value, positive). For a 1339 // stack that grown down, this works out to be (B - E). [3] 1340 // 1341 // E is also the value of %rsp after stack has been set up, and we 1342 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now 1343 // (C - E) == (C - A) - (B - A) + (B - E) 1344 // { Using [1], [2] and [3] above } 1345 // == getObjectOffset - LocalAreaOffset + StackSize 1346 // 1347 1348 // Get the Offset from the StackPointer 1349 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1350 1351 return Offset + StackSize; 1352} 1353// Simplified from getFrameIndexReference keeping only StackPointer cases 1354int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 1355 int FI, 1356 unsigned &FrameReg) const { 1357 const X86RegisterInfo *RegInfo = 1358 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1359 assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); 1360 1361 FrameReg = RegInfo->getStackRegister(); 1362 return getFrameIndexOffsetFromSP(MF, FI); 1363} 1364 1365bool X86FrameLowering::assignCalleeSavedSpillSlots( 1366 MachineFunction &MF, const TargetRegisterInfo *TRI, 1367 std::vector<CalleeSavedInfo> &CSI) const { 1368 MachineFrameInfo *MFI = MF.getFrameInfo(); 1369 const X86RegisterInfo *RegInfo = 1370 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1371 unsigned SlotSize = RegInfo->getSlotSize(); 1372 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1373 1374 unsigned CalleeSavedFrameSize = 0; 1375 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); 1376 1377 if (hasFP(MF)) { 1378 // emitPrologue always spills frame register the first thing. 1379 SpillSlotOffset -= SlotSize; 1380 MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1381 1382 // Since emitPrologue and emitEpilogue will handle spilling and restoring of 1383 // the frame register, we can delete it from CSI list and not have to worry 1384 // about avoiding it later. 1385 unsigned FPReg = RegInfo->getFrameRegister(MF); 1386 for (unsigned i = 0; i < CSI.size(); ++i) { 1387 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { 1388 CSI.erase(CSI.begin() + i); 1389 break; 1390 } 1391 } 1392 } 1393 1394 // Assign slots for GPRs. It increases frame size. 1395 for (unsigned i = CSI.size(); i != 0; --i) { 1396 unsigned Reg = CSI[i - 1].getReg(); 1397 1398 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1399 continue; 1400 1401 SpillSlotOffset -= SlotSize; 1402 CalleeSavedFrameSize += SlotSize; 1403 1404 int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1405 CSI[i - 1].setFrameIdx(SlotIndex); 1406 } 1407 1408 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); 1409 1410 // Assign slots for XMMs. 1411 for (unsigned i = CSI.size(); i != 0; --i) { 1412 unsigned Reg = CSI[i - 1].getReg(); 1413 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1414 continue; 1415 1416 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); 1417 // ensure alignment 1418 SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); 1419 // spill into slot 1420 SpillSlotOffset -= RC->getSize(); 1421 int SlotIndex = 1422 MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); 1423 CSI[i - 1].setFrameIdx(SlotIndex); 1424 MFI->ensureMaxAlignment(RC->getAlignment()); 1425 } 1426 1427 return true; 1428} 1429 1430bool X86FrameLowering::spillCalleeSavedRegisters( 1431 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1432 const std::vector<CalleeSavedInfo> &CSI, 1433 const TargetRegisterInfo *TRI) const { 1434 DebugLoc DL = MBB.findDebugLoc(MI); 1435 1436 MachineFunction &MF = *MBB.getParent(); 1437 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1438 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1439 1440 // Push GPRs. It increases frame size. 1441 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 1442 for (unsigned i = CSI.size(); i != 0; --i) { 1443 unsigned Reg = CSI[i - 1].getReg(); 1444 1445 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1446 continue; 1447 // Add the callee-saved register as live-in. It's killed at the spill. 1448 MBB.addLiveIn(Reg); 1449 1450 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 1451 .setMIFlag(MachineInstr::FrameSetup); 1452 } 1453 1454 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1455 // It can be done by spilling XMMs to stack frame. 1456 for (unsigned i = CSI.size(); i != 0; --i) { 1457 unsigned Reg = CSI[i-1].getReg(); 1458 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1459 continue; 1460 // Add the callee-saved register as live-in. It's killed at the spill. 1461 MBB.addLiveIn(Reg); 1462 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1463 1464 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, 1465 TRI); 1466 --MI; 1467 MI->setFlag(MachineInstr::FrameSetup); 1468 ++MI; 1469 } 1470 1471 return true; 1472} 1473 1474bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1475 MachineBasicBlock::iterator MI, 1476 const std::vector<CalleeSavedInfo> &CSI, 1477 const TargetRegisterInfo *TRI) const { 1478 if (CSI.empty()) 1479 return false; 1480 1481 DebugLoc DL = MBB.findDebugLoc(MI); 1482 1483 MachineFunction &MF = *MBB.getParent(); 1484 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1485 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1486 1487 // Reload XMMs from stack frame. 1488 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1489 unsigned Reg = CSI[i].getReg(); 1490 if (X86::GR64RegClass.contains(Reg) || 1491 X86::GR32RegClass.contains(Reg)) 1492 continue; 1493 1494 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1495 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); 1496 } 1497 1498 // POP GPRs. 1499 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1500 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1501 unsigned Reg = CSI[i].getReg(); 1502 if (!X86::GR64RegClass.contains(Reg) && 1503 !X86::GR32RegClass.contains(Reg)) 1504 continue; 1505 1506 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 1507 } 1508 return true; 1509} 1510 1511void 1512X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 1513 RegScavenger *RS) const { 1514 MachineFrameInfo *MFI = MF.getFrameInfo(); 1515 const X86RegisterInfo *RegInfo = 1516 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1517 unsigned SlotSize = RegInfo->getSlotSize(); 1518 1519 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1520 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1521 1522 if (TailCallReturnAddrDelta < 0) { 1523 // create RETURNADDR area 1524 // arg 1525 // arg 1526 // RETADDR 1527 // { ... 1528 // RETADDR area 1529 // ... 1530 // } 1531 // [EBP] 1532 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1533 TailCallReturnAddrDelta - SlotSize, true); 1534 } 1535 1536 // Spill the BasePtr if it's used. 1537 if (RegInfo->hasBasePointer(MF)) 1538 MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); 1539} 1540 1541static bool 1542HasNestArgument(const MachineFunction *MF) { 1543 const Function *F = MF->getFunction(); 1544 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1545 I != E; I++) { 1546 if (I->hasNestAttr()) 1547 return true; 1548 } 1549 return false; 1550} 1551 1552/// GetScratchRegister - Get a temp register for performing work in the 1553/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 1554/// and the properties of the function either one or two registers will be 1555/// needed. Set primary to true for the first register, false for the second. 1556static unsigned 1557GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { 1558 CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 1559 1560 // Erlang stuff. 1561 if (CallingConvention == CallingConv::HiPE) { 1562 if (Is64Bit) 1563 return Primary ? X86::R14 : X86::R13; 1564 else 1565 return Primary ? X86::EBX : X86::EDI; 1566 } 1567 1568 if (Is64Bit) { 1569 if (IsLP64) 1570 return Primary ? X86::R11 : X86::R12; 1571 else 1572 return Primary ? X86::R11D : X86::R12D; 1573 } 1574 1575 bool IsNested = HasNestArgument(&MF); 1576 1577 if (CallingConvention == CallingConv::X86_FastCall || 1578 CallingConvention == CallingConv::Fast) { 1579 if (IsNested) 1580 report_fatal_error("Segmented stacks does not support fastcall with " 1581 "nested function."); 1582 return Primary ? X86::EAX : X86::ECX; 1583 } 1584 if (IsNested) 1585 return Primary ? X86::EDX : X86::EAX; 1586 return Primary ? X86::ECX : X86::EAX; 1587} 1588 1589// The stack limit in the TCB is set to this many bytes above the actual stack 1590// limit. 1591static const uint64_t kSplitStackAvailable = 256; 1592 1593void 1594X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { 1595 MachineBasicBlock &prologueMBB = MF.front(); 1596 MachineFrameInfo *MFI = MF.getFrameInfo(); 1597 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1598 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1599 uint64_t StackSize; 1600 bool Is64Bit = STI.is64Bit(); 1601 const bool IsLP64 = STI.isTarget64BitLP64(); 1602 unsigned TlsReg, TlsOffset; 1603 DebugLoc DL; 1604 1605 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1606 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1607 "Scratch register is live-in"); 1608 1609 if (MF.getFunction()->isVarArg()) 1610 report_fatal_error("Segmented stacks do not support vararg functions."); 1611 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && 1612 !STI.isTargetWin64() && !STI.isTargetFreeBSD() && 1613 !STI.isTargetDragonFly()) 1614 report_fatal_error("Segmented stacks not supported on this platform."); 1615 1616 // Eventually StackSize will be calculated by a link-time pass; which will 1617 // also decide whether checking code needs to be injected into this particular 1618 // prologue. 1619 StackSize = MFI->getStackSize(); 1620 1621 // Do not generate a prologue for functions with a stack of size zero 1622 if (StackSize == 0) 1623 return; 1624 1625 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 1626 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 1627 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1628 bool IsNested = false; 1629 1630 // We need to know if the function has a nest argument only in 64 bit mode. 1631 if (Is64Bit) 1632 IsNested = HasNestArgument(&MF); 1633 1634 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 1635 // allocMBB needs to be last (terminating) instruction. 1636 1637 for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), 1638 e = prologueMBB.livein_end(); i != e; i++) { 1639 allocMBB->addLiveIn(*i); 1640 checkMBB->addLiveIn(*i); 1641 } 1642 1643 if (IsNested) 1644 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); 1645 1646 MF.push_front(allocMBB); 1647 MF.push_front(checkMBB); 1648 1649 // When the frame size is less than 256 we just compare the stack 1650 // boundary directly to the value of the stack pointer, per gcc. 1651 bool CompareStackPointer = StackSize < kSplitStackAvailable; 1652 1653 // Read the limit off the current stacklet off the stack_guard location. 1654 if (Is64Bit) { 1655 if (STI.isTargetLinux()) { 1656 TlsReg = X86::FS; 1657 TlsOffset = IsLP64 ? 0x70 : 0x40; 1658 } else if (STI.isTargetDarwin()) { 1659 TlsReg = X86::GS; 1660 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 1661 } else if (STI.isTargetWin64()) { 1662 TlsReg = X86::GS; 1663 TlsOffset = 0x28; // pvArbitrary, reserved for application use 1664 } else if (STI.isTargetFreeBSD()) { 1665 TlsReg = X86::FS; 1666 TlsOffset = 0x18; 1667 } else if (STI.isTargetDragonFly()) { 1668 TlsReg = X86::FS; 1669 TlsOffset = 0x20; // use tls_tcb.tcb_segstack 1670 } else { 1671 report_fatal_error("Segmented stacks not supported on this platform."); 1672 } 1673 1674 if (CompareStackPointer) 1675 ScratchReg = IsLP64 ? X86::RSP : X86::ESP; 1676 else 1677 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) 1678 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1679 1680 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) 1681 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1682 } else { 1683 if (STI.isTargetLinux()) { 1684 TlsReg = X86::GS; 1685 TlsOffset = 0x30; 1686 } else if (STI.isTargetDarwin()) { 1687 TlsReg = X86::GS; 1688 TlsOffset = 0x48 + 90*4; 1689 } else if (STI.isTargetWin32()) { 1690 TlsReg = X86::FS; 1691 TlsOffset = 0x14; // pvArbitrary, reserved for application use 1692 } else if (STI.isTargetDragonFly()) { 1693 TlsReg = X86::FS; 1694 TlsOffset = 0x10; // use tls_tcb.tcb_segstack 1695 } else if (STI.isTargetFreeBSD()) { 1696 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 1697 } else { 1698 report_fatal_error("Segmented stacks not supported on this platform."); 1699 } 1700 1701 if (CompareStackPointer) 1702 ScratchReg = X86::ESP; 1703 else 1704 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 1705 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1706 1707 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || 1708 STI.isTargetDragonFly()) { 1709 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 1710 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1711 } else if (STI.isTargetDarwin()) { 1712 1713 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. 1714 unsigned ScratchReg2; 1715 bool SaveScratch2; 1716 if (CompareStackPointer) { 1717 // The primary scratch register is available for holding the TLS offset. 1718 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1719 SaveScratch2 = false; 1720 } else { 1721 // Need to use a second register to hold the TLS offset 1722 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); 1723 1724 // Unfortunately, with fastcc the second scratch register may hold an 1725 // argument. 1726 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 1727 } 1728 1729 // If Scratch2 is live-in then it needs to be saved. 1730 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 1731 "Scratch register is live-in and not saved"); 1732 1733 if (SaveScratch2) 1734 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 1735 .addReg(ScratchReg2, RegState::Kill); 1736 1737 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 1738 .addImm(TlsOffset); 1739 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 1740 .addReg(ScratchReg) 1741 .addReg(ScratchReg2).addImm(1).addReg(0) 1742 .addImm(0) 1743 .addReg(TlsReg); 1744 1745 if (SaveScratch2) 1746 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 1747 } 1748 } 1749 1750 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 1751 // It jumps to normal execution of the function body. 1752 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB); 1753 1754 // On 32 bit we first push the arguments size and then the frame size. On 64 1755 // bit, we pass the stack frame size in r10 and the argument size in r11. 1756 if (Is64Bit) { 1757 // Functions with nested arguments use R10, so it needs to be saved across 1758 // the call to _morestack 1759 1760 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; 1761 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; 1762 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; 1763 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; 1764 const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; 1765 1766 if (IsNested) 1767 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); 1768 1769 BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) 1770 .addImm(StackSize); 1771 BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) 1772 .addImm(X86FI->getArgumentStackSize()); 1773 MF.getRegInfo().setPhysRegUsed(Reg10); 1774 MF.getRegInfo().setPhysRegUsed(Reg11); 1775 } else { 1776 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1777 .addImm(X86FI->getArgumentStackSize()); 1778 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1779 .addImm(StackSize); 1780 } 1781 1782 // __morestack is in libgcc 1783 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 1784 // Under the large code model, we cannot assume that __morestack lives 1785 // within 2^31 bytes of the call site, so we cannot use pc-relative 1786 // addressing. We cannot perform the call via a temporary register, 1787 // as the rax register may be used to store the static chain, and all 1788 // other suitable registers may be either callee-save or used for 1789 // parameter passing. We cannot use the stack at this point either 1790 // because __morestack manipulates the stack directly. 1791 // 1792 // To avoid these issues, perform an indirect call via a read-only memory 1793 // location containing the address. 1794 // 1795 // This solution is not perfect, as it assumes that the .rodata section 1796 // is laid out within 2^31 bytes of each function body, but this seems 1797 // to be sufficient for JIT. 1798 BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) 1799 .addReg(X86::RIP) 1800 .addImm(0) 1801 .addReg(0) 1802 .addExternalSymbol("__morestack_addr") 1803 .addReg(0); 1804 MF.getMMI().setUsesMorestackAddr(true); 1805 } else { 1806 if (Is64Bit) 1807 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 1808 .addExternalSymbol("__morestack"); 1809 else 1810 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 1811 .addExternalSymbol("__morestack"); 1812 } 1813 1814 if (IsNested) 1815 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 1816 else 1817 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 1818 1819 allocMBB->addSuccessor(&prologueMBB); 1820 1821 checkMBB->addSuccessor(allocMBB); 1822 checkMBB->addSuccessor(&prologueMBB); 1823 1824#ifdef XDEBUG 1825 MF.verify(); 1826#endif 1827} 1828 1829/// Erlang programs may need a special prologue to handle the stack size they 1830/// might need at runtime. That is because Erlang/OTP does not implement a C 1831/// stack but uses a custom implementation of hybrid stack/heap architecture. 1832/// (for more information see Eric Stenman's Ph.D. thesis: 1833/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 1834/// 1835/// CheckStack: 1836/// temp0 = sp - MaxStack 1837/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1838/// OldStart: 1839/// ... 1840/// IncStack: 1841/// call inc_stack # doubles the stack space 1842/// temp0 = sp - MaxStack 1843/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1844void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { 1845 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1846 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1847 MachineFrameInfo *MFI = MF.getFrameInfo(); 1848 const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize(); 1849 const bool Is64Bit = STI.is64Bit(); 1850 const bool IsLP64 = STI.isTarget64BitLP64(); 1851 DebugLoc DL; 1852 // HiPE-specific values 1853 const unsigned HipeLeafWords = 24; 1854 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 1855 const unsigned Guaranteed = HipeLeafWords * SlotSize; 1856 unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 1857 MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 1858 unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 1859 1860 assert(STI.isTargetLinux() && 1861 "HiPE prologue is only supported on Linux operating systems."); 1862 1863 // Compute the largest caller's frame that is needed to fit the callees' 1864 // frames. This 'MaxStack' is computed from: 1865 // 1866 // a) the fixed frame size, which is the space needed for all spilled temps, 1867 // b) outgoing on-stack parameter areas, and 1868 // c) the minimum stack space this function needs to make available for the 1869 // functions it calls (a tunable ABI property). 1870 if (MFI->hasCalls()) { 1871 unsigned MoreStackForCalls = 0; 1872 1873 for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 1874 MBBI != MBBE; ++MBBI) 1875 for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 1876 MI != ME; ++MI) { 1877 if (!MI->isCall()) 1878 continue; 1879 1880 // Get callee operand. 1881 const MachineOperand &MO = MI->getOperand(0); 1882 1883 // Only take account of global function calls (no closures etc.). 1884 if (!MO.isGlobal()) 1885 continue; 1886 1887 const Function *F = dyn_cast<Function>(MO.getGlobal()); 1888 if (!F) 1889 continue; 1890 1891 // Do not update 'MaxStack' for primitive and built-in functions 1892 // (encoded with names either starting with "erlang."/"bif_" or not 1893 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 1894 // "_", such as the BIF "suspend_0") as they are executed on another 1895 // stack. 1896 if (F->getName().find("erlang.") != StringRef::npos || 1897 F->getName().find("bif_") != StringRef::npos || 1898 F->getName().find_first_of("._") == StringRef::npos) 1899 continue; 1900 1901 unsigned CalleeStkArity = 1902 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 1903 if (HipeLeafWords - 1 > CalleeStkArity) 1904 MoreStackForCalls = std::max(MoreStackForCalls, 1905 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 1906 } 1907 MaxStack += MoreStackForCalls; 1908 } 1909 1910 // If the stack frame needed is larger than the guaranteed then runtime checks 1911 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 1912 if (MaxStack > Guaranteed) { 1913 MachineBasicBlock &prologueMBB = MF.front(); 1914 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 1915 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 1916 1917 for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), 1918 E = prologueMBB.livein_end(); I != E; I++) { 1919 stackCheckMBB->addLiveIn(*I); 1920 incStackMBB->addLiveIn(*I); 1921 } 1922 1923 MF.push_front(incStackMBB); 1924 MF.push_front(stackCheckMBB); 1925 1926 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 1927 unsigned LEAop, CMPop, CALLop; 1928 if (Is64Bit) { 1929 SPReg = X86::RSP; 1930 PReg = X86::RBP; 1931 LEAop = X86::LEA64r; 1932 CMPop = X86::CMP64rm; 1933 CALLop = X86::CALL64pcrel32; 1934 SPLimitOffset = 0x90; 1935 } else { 1936 SPReg = X86::ESP; 1937 PReg = X86::EBP; 1938 LEAop = X86::LEA32r; 1939 CMPop = X86::CMP32rm; 1940 CALLop = X86::CALLpcrel32; 1941 SPLimitOffset = 0x4c; 1942 } 1943 1944 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1945 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1946 "HiPE prologue scratch register is live-in"); 1947 1948 // Create new MBB for StackCheck: 1949 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 1950 SPReg, false, -MaxStack); 1951 // SPLimitOffset is in a fixed heap location (pointed by BP). 1952 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 1953 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1954 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB); 1955 1956 // Create new MBB for IncStack: 1957 BuildMI(incStackMBB, DL, TII.get(CALLop)). 1958 addExternalSymbol("inc_stack_0"); 1959 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 1960 SPReg, false, -MaxStack); 1961 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 1962 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1963 BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); 1964 1965 stackCheckMBB->addSuccessor(&prologueMBB, 99); 1966 stackCheckMBB->addSuccessor(incStackMBB, 1); 1967 incStackMBB->addSuccessor(&prologueMBB, 99); 1968 incStackMBB->addSuccessor(incStackMBB, 1); 1969 } 1970#ifdef XDEBUG 1971 MF.verify(); 1972#endif 1973} 1974 1975void X86FrameLowering:: 1976eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1977 MachineBasicBlock::iterator I) const { 1978 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1979 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1980 const X86RegisterInfo &RegInfo = *STI.getRegisterInfo(); 1981 unsigned StackPtr = RegInfo.getStackRegister(); 1982 bool reserveCallFrame = hasReservedCallFrame(MF); 1983 int Opcode = I->getOpcode(); 1984 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 1985 bool IsLP64 = STI.isTarget64BitLP64(); 1986 DebugLoc DL = I->getDebugLoc(); 1987 uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; 1988 uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; 1989 I = MBB.erase(I); 1990 1991 if (!reserveCallFrame) { 1992 // If the stack pointer can be changed after prologue, turn the 1993 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 1994 // adjcallstackdown instruction into 'add ESP, <amt>' 1995 if (Amount == 0) 1996 return; 1997 1998 // We need to keep the stack aligned properly. To do this, we round the 1999 // amount of space needed for the outgoing arguments up to the next 2000 // alignment boundary. 2001 unsigned StackAlign = getStackAlignment(); 2002 Amount = RoundUpToAlignment(Amount, StackAlign); 2003 2004 MachineInstr *New = nullptr; 2005 2006 // Factor out the amount that gets handled inside the sequence 2007 // (Pushes of argument for frame setup, callee pops for frame destroy) 2008 Amount -= InternalAmt; 2009 2010 if (Amount) { 2011 if (Opcode == TII.getCallFrameSetupOpcode()) { 2012 New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr) 2013 .addReg(StackPtr).addImm(Amount); 2014 } else { 2015 assert(Opcode == TII.getCallFrameDestroyOpcode()); 2016 2017 unsigned Opc = getADDriOpcode(IsLP64, Amount); 2018 New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 2019 .addReg(StackPtr).addImm(Amount); 2020 } 2021 } 2022 2023 if (New) { 2024 // The EFLAGS implicit def is dead. 2025 New->getOperand(3).setIsDead(); 2026 2027 // Replace the pseudo instruction with a new instruction. 2028 MBB.insert(I, New); 2029 } 2030 2031 return; 2032 } 2033 2034 if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) { 2035 // If we are performing frame pointer elimination and if the callee pops 2036 // something off the stack pointer, add it back. We do this until we have 2037 // more advanced stack pointer tracking ability. 2038 unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt); 2039 MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 2040 .addReg(StackPtr).addImm(InternalAmt); 2041 2042 // The EFLAGS implicit def is dead. 2043 New->getOperand(3).setIsDead(); 2044 2045 // We are not tracking the stack pointer adjustment by the callee, so make 2046 // sure we restore the stack pointer immediately after the call, there may 2047 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 2048 MachineBasicBlock::iterator B = MBB.begin(); 2049 while (I != B && !std::prev(I)->isCall()) 2050 --I; 2051 MBB.insert(I, New); 2052 } 2053} 2054 2055