X86FrameLowering.cpp revision 2763538609fd455d63c192b320c73fb5d48c3e47
1//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of TargetFrameLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "X86FrameLowering.h" 15#include "X86InstrBuilder.h" 16#include "X86InstrInfo.h" 17#include "X86MachineFunctionInfo.h" 18#include "X86TargetMachine.h" 19#include "llvm/Function.h" 20#include "llvm/CodeGen/MachineFrameInfo.h" 21#include "llvm/CodeGen/MachineFunction.h" 22#include "llvm/CodeGen/MachineInstrBuilder.h" 23#include "llvm/CodeGen/MachineModuleInfo.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetOptions.h" 27#include "llvm/Support/CommandLine.h" 28#include "llvm/ADT/SmallSet.h" 29 30using namespace llvm; 31 32// FIXME: completely move here. 33extern cl::opt<bool> ForceStackAlign; 34 35bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 36 return !MF.getFrameInfo()->hasVarSizedObjects(); 37} 38 39/// hasFP - Return true if the specified function should have a dedicated frame 40/// pointer register. This is true if the function has variable sized allocas 41/// or if frame pointer elimination is disabled. 42bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 43 const MachineFrameInfo *MFI = MF.getFrameInfo(); 44 const MachineModuleInfo &MMI = MF.getMMI(); 45 const TargetRegisterInfo *RI = TM.getRegisterInfo(); 46 47 return (DisableFramePointerElim(MF) || 48 RI->needsStackRealignment(MF) || 49 MFI->hasVarSizedObjects() || 50 MFI->isFrameAddressTaken() || 51 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 52 MMI.callsUnwindInit()); 53} 54 55static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { 56 if (is64Bit) { 57 if (isInt<8>(Imm)) 58 return X86::SUB64ri8; 59 return X86::SUB64ri32; 60 } else { 61 if (isInt<8>(Imm)) 62 return X86::SUB32ri8; 63 return X86::SUB32ri; 64 } 65} 66 67static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { 68 if (is64Bit) { 69 if (isInt<8>(Imm)) 70 return X86::ADD64ri8; 71 return X86::ADD64ri32; 72 } else { 73 if (isInt<8>(Imm)) 74 return X86::ADD32ri8; 75 return X86::ADD32ri; 76 } 77} 78 79/// findDeadCallerSavedReg - Return a caller-saved register that isn't live 80/// when it reaches the "return" instruction. We can then pop a stack object 81/// to this register without worry about clobbering it. 82static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 83 MachineBasicBlock::iterator &MBBI, 84 const TargetRegisterInfo &TRI, 85 bool Is64Bit) { 86 const MachineFunction *MF = MBB.getParent(); 87 const Function *F = MF->getFunction(); 88 if (!F || MF->getMMI().callsEHReturn()) 89 return 0; 90 91 static const unsigned CallerSavedRegs32Bit[] = { 92 X86::EAX, X86::EDX, X86::ECX 93 }; 94 95 static const unsigned CallerSavedRegs64Bit[] = { 96 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 97 X86::R8, X86::R9, X86::R10, X86::R11 98 }; 99 100 unsigned Opc = MBBI->getOpcode(); 101 switch (Opc) { 102 default: return 0; 103 case X86::RET: 104 case X86::RETI: 105 case X86::TCRETURNdi: 106 case X86::TCRETURNri: 107 case X86::TCRETURNmi: 108 case X86::TCRETURNdi64: 109 case X86::TCRETURNri64: 110 case X86::TCRETURNmi64: 111 case X86::EH_RETURN: 112 case X86::EH_RETURN64: { 113 SmallSet<unsigned, 8> Uses; 114 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 115 MachineOperand &MO = MBBI->getOperand(i); 116 if (!MO.isReg() || MO.isDef()) 117 continue; 118 unsigned Reg = MO.getReg(); 119 if (!Reg) 120 continue; 121 for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) 122 Uses.insert(*AsI); 123 } 124 125 const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 126 for (; *CS; ++CS) 127 if (!Uses.count(*CS)) 128 return *CS; 129 } 130 } 131 132 return 0; 133} 134 135 136/// emitSPUpdate - Emit a series of instructions to increment / decrement the 137/// stack pointer by a constant value. 138static 139void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 140 unsigned StackPtr, int64_t NumBytes, 141 bool Is64Bit, const TargetInstrInfo &TII, 142 const TargetRegisterInfo &TRI) { 143 bool isSub = NumBytes < 0; 144 uint64_t Offset = isSub ? -NumBytes : NumBytes; 145 unsigned Opc = isSub ? 146 getSUBriOpcode(Is64Bit, Offset) : 147 getADDriOpcode(Is64Bit, Offset); 148 uint64_t Chunk = (1LL << 31) - 1; 149 DebugLoc DL = MBB.findDebugLoc(MBBI); 150 151 while (Offset) { 152 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 153 if (ThisVal == (Is64Bit ? 8 : 4)) { 154 // Use push / pop instead. 155 unsigned Reg = isSub 156 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 157 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 158 if (Reg) { 159 Opc = isSub 160 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 161 : (Is64Bit ? X86::POP64r : X86::POP32r); 162 BuildMI(MBB, MBBI, DL, TII.get(Opc)) 163 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 164 Offset -= ThisVal; 165 continue; 166 } 167 } 168 169 MachineInstr *MI = 170 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 171 .addReg(StackPtr) 172 .addImm(ThisVal); 173 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 174 Offset -= ThisVal; 175 } 176} 177 178/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 179static 180void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 181 unsigned StackPtr, uint64_t *NumBytes = NULL) { 182 if (MBBI == MBB.begin()) return; 183 184 MachineBasicBlock::iterator PI = prior(MBBI); 185 unsigned Opc = PI->getOpcode(); 186 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 187 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 188 PI->getOperand(0).getReg() == StackPtr) { 189 if (NumBytes) 190 *NumBytes += PI->getOperand(2).getImm(); 191 MBB.erase(PI); 192 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 193 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 194 PI->getOperand(0).getReg() == StackPtr) { 195 if (NumBytes) 196 *NumBytes -= PI->getOperand(2).getImm(); 197 MBB.erase(PI); 198 } 199} 200 201/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. 202static 203void mergeSPUpdatesDown(MachineBasicBlock &MBB, 204 MachineBasicBlock::iterator &MBBI, 205 unsigned StackPtr, uint64_t *NumBytes = NULL) { 206 // FIXME: THIS ISN'T RUN!!! 207 return; 208 209 if (MBBI == MBB.end()) return; 210 211 MachineBasicBlock::iterator NI = llvm::next(MBBI); 212 if (NI == MBB.end()) return; 213 214 unsigned Opc = NI->getOpcode(); 215 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 216 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 217 NI->getOperand(0).getReg() == StackPtr) { 218 if (NumBytes) 219 *NumBytes -= NI->getOperand(2).getImm(); 220 MBB.erase(NI); 221 MBBI = NI; 222 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 223 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 224 NI->getOperand(0).getReg() == StackPtr) { 225 if (NumBytes) 226 *NumBytes += NI->getOperand(2).getImm(); 227 MBB.erase(NI); 228 MBBI = NI; 229 } 230} 231 232/// mergeSPUpdates - Checks the instruction before/after the passed 233/// instruction. If it is an ADD/SUB instruction it is deleted argument and the 234/// stack adjustment is returned as a positive value for ADD and a negative for 235/// SUB. 236static int mergeSPUpdates(MachineBasicBlock &MBB, 237 MachineBasicBlock::iterator &MBBI, 238 unsigned StackPtr, 239 bool doMergeWithPrevious) { 240 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 241 (!doMergeWithPrevious && MBBI == MBB.end())) 242 return 0; 243 244 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 245 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); 246 unsigned Opc = PI->getOpcode(); 247 int Offset = 0; 248 249 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 250 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 251 PI->getOperand(0).getReg() == StackPtr){ 252 Offset += PI->getOperand(2).getImm(); 253 MBB.erase(PI); 254 if (!doMergeWithPrevious) MBBI = NI; 255 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 256 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 257 PI->getOperand(0).getReg() == StackPtr) { 258 Offset -= PI->getOperand(2).getImm(); 259 MBB.erase(PI); 260 if (!doMergeWithPrevious) MBBI = NI; 261 } 262 263 return Offset; 264} 265 266static bool isEAXLiveIn(MachineFunction &MF) { 267 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 268 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 269 unsigned Reg = II->first; 270 271 if (Reg == X86::EAX || Reg == X86::AX || 272 Reg == X86::AH || Reg == X86::AL) 273 return true; 274 } 275 276 return false; 277} 278 279void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, 280 MCSymbol *Label, 281 unsigned FramePtr) const { 282 MachineFrameInfo *MFI = MF.getFrameInfo(); 283 MachineModuleInfo &MMI = MF.getMMI(); 284 285 // Add callee saved registers to move list. 286 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 287 if (CSI.empty()) return; 288 289 std::vector<MachineMove> &Moves = MMI.getFrameMoves(); 290 const TargetData *TD = TM.getTargetData(); 291 bool HasFP = hasFP(MF); 292 293 // Calculate amount of bytes used for return address storing. 294 int stackGrowth = -TD->getPointerSize(); 295 296 // FIXME: This is dirty hack. The code itself is pretty mess right now. 297 // It should be rewritten from scratch and generalized sometimes. 298 299 // Determine maximum offset (minumum due to stack growth). 300 int64_t MaxOffset = 0; 301 for (std::vector<CalleeSavedInfo>::const_iterator 302 I = CSI.begin(), E = CSI.end(); I != E; ++I) 303 MaxOffset = std::min(MaxOffset, 304 MFI->getObjectOffset(I->getFrameIdx())); 305 306 // Calculate offsets. 307 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 308 for (std::vector<CalleeSavedInfo>::const_iterator 309 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 310 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 311 unsigned Reg = I->getReg(); 312 Offset = MaxOffset - Offset + saveAreaOffset; 313 314 // Don't output a new machine move if we're re-saving the frame 315 // pointer. This happens when the PrologEpilogInserter has inserted an extra 316 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 317 // generates one when frame pointers are used. If we generate a "machine 318 // move" for this extra "PUSH", the linker will lose track of the fact that 319 // the frame pointer should have the value of the first "PUSH" when it's 320 // trying to unwind. 321 // 322 // FIXME: This looks inelegant. It's possibly correct, but it's covering up 323 // another bug. I.e., one where we generate a prolog like this: 324 // 325 // pushl %ebp 326 // movl %esp, %ebp 327 // pushl %ebp 328 // pushl %esi 329 // ... 330 // 331 // The immediate re-push of EBP is unnecessary. At the least, it's an 332 // optimization bug. EBP can be used as a scratch register in certain 333 // cases, but probably not when we have a frame pointer. 334 if (HasFP && FramePtr == Reg) 335 continue; 336 337 MachineLocation CSDst(MachineLocation::VirtualFP, Offset); 338 MachineLocation CSSrc(Reg); 339 Moves.push_back(MachineMove(Label, CSDst, CSSrc)); 340 } 341} 342 343/// emitPrologue - Push callee-saved registers onto the stack, which 344/// automatically adjust the stack pointer. Adjust the stack pointer to allocate 345/// space for local variables. Also emit labels used by the exception handler to 346/// generate the exception handling frames. 347void X86FrameLowering::emitPrologue(MachineFunction &MF) const { 348 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 349 MachineBasicBlock::iterator MBBI = MBB.begin(); 350 MachineFrameInfo *MFI = MF.getFrameInfo(); 351 const Function *Fn = MF.getFunction(); 352 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 353 const X86InstrInfo &TII = *TM.getInstrInfo(); 354 MachineModuleInfo &MMI = MF.getMMI(); 355 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 356 bool needsFrameMoves = MMI.hasDebugInfo() || 357 !Fn->doesNotThrow() || UnwindTablesMandatory; 358 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 359 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 360 bool HasFP = hasFP(MF); 361 bool Is64Bit = STI.is64Bit(); 362 bool IsWin64 = STI.isTargetWin64(); 363 unsigned StackAlign = getStackAlignment(); 364 unsigned SlotSize = RegInfo->getSlotSize(); 365 unsigned FramePtr = RegInfo->getFrameRegister(MF); 366 unsigned StackPtr = RegInfo->getStackRegister(); 367 368 DebugLoc DL; 369 370 // If we're forcing a stack realignment we can't rely on just the frame 371 // info, we need to know the ABI stack alignment as well in case we 372 // have a call out. Otherwise just make sure we have some alignment - we'll 373 // go with the minimum SlotSize. 374 if (ForceStackAlign) { 375 if (MFI->hasCalls()) 376 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 377 else if (MaxAlign < SlotSize) 378 MaxAlign = SlotSize; 379 } 380 381 // Add RETADDR move area to callee saved frame size. 382 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 383 if (TailCallReturnAddrDelta < 0) 384 X86FI->setCalleeSavedFrameSize( 385 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 386 387 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 388 // function, and use up to 128 bytes of stack space, don't have a frame 389 // pointer, calls, or dynamic alloca then we do not need to adjust the 390 // stack pointer (we fit in the Red Zone). 391 if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && 392 !RegInfo->needsStackRealignment(MF) && 393 !MFI->hasVarSizedObjects() && // No dynamic alloca. 394 !MFI->adjustsStack() && // No calls. 395 !IsWin64) { // Win64 has no Red Zone 396 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 397 if (HasFP) MinSize += SlotSize; 398 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 399 MFI->setStackSize(StackSize); 400 } else if (IsWin64) { 401 // We need to always allocate 32 bytes as register spill area. 402 // FIXME: We might reuse these 32 bytes for leaf functions. 403 StackSize += 32; 404 MFI->setStackSize(StackSize); 405 } 406 407 // Insert stack pointer adjustment for later moving of return addr. Only 408 // applies to tail call optimized functions where the callee argument stack 409 // size is bigger than the callers. 410 if (TailCallReturnAddrDelta < 0) { 411 MachineInstr *MI = 412 BuildMI(MBB, MBBI, DL, 413 TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), 414 StackPtr) 415 .addReg(StackPtr) 416 .addImm(-TailCallReturnAddrDelta); 417 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 418 } 419 420 // Mapping for machine moves: 421 // 422 // DST: VirtualFP AND 423 // SRC: VirtualFP => DW_CFA_def_cfa_offset 424 // ELSE => DW_CFA_def_cfa 425 // 426 // SRC: VirtualFP AND 427 // DST: Register => DW_CFA_def_cfa_register 428 // 429 // ELSE 430 // OFFSET < 0 => DW_CFA_offset_extended_sf 431 // REG < 64 => DW_CFA_offset + Reg 432 // ELSE => DW_CFA_offset_extended 433 434 std::vector<MachineMove> &Moves = MMI.getFrameMoves(); 435 const TargetData *TD = MF.getTarget().getTargetData(); 436 uint64_t NumBytes = 0; 437 int stackGrowth = -TD->getPointerSize(); 438 439 if (HasFP) { 440 // Calculate required stack adjustment. 441 uint64_t FrameSize = StackSize - SlotSize; 442 if (RegInfo->needsStackRealignment(MF)) 443 FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 444 445 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 446 447 // Get the offset of the stack slot for the EBP register, which is 448 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 449 // Update the frame offset adjustment. 450 MFI->setOffsetAdjustment(-NumBytes); 451 452 // Save EBP/RBP into the appropriate stack slot. 453 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 454 .addReg(FramePtr, RegState::Kill); 455 456 if (needsFrameMoves) { 457 // Mark the place where EBP/RBP was saved. 458 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 459 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); 460 461 // Define the current CFA rule to use the provided offset. 462 if (StackSize) { 463 MachineLocation SPDst(MachineLocation::VirtualFP); 464 MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); 465 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); 466 } else { 467 MachineLocation SPDst(StackPtr); 468 MachineLocation SPSrc(StackPtr, stackGrowth); 469 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); 470 } 471 472 // Change the rule for the FramePtr to be an "offset" rule. 473 MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); 474 MachineLocation FPSrc(FramePtr); 475 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); 476 } 477 478 // Update EBP with the new base value... 479 BuildMI(MBB, MBBI, DL, 480 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 481 .addReg(StackPtr); 482 483 if (needsFrameMoves) { 484 // Mark effective beginning of when frame pointer becomes valid. 485 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 486 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); 487 488 // Define the current CFA to use the EBP/RBP register. 489 MachineLocation FPDst(FramePtr); 490 MachineLocation FPSrc(MachineLocation::VirtualFP); 491 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); 492 } 493 494 // Mark the FramePtr as live-in in every block except the entry. 495 for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); 496 I != E; ++I) 497 I->addLiveIn(FramePtr); 498 499 // Realign stack 500 if (RegInfo->needsStackRealignment(MF)) { 501 MachineInstr *MI = 502 BuildMI(MBB, MBBI, DL, 503 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), 504 StackPtr).addReg(StackPtr).addImm(-MaxAlign); 505 506 // The EFLAGS implicit def is dead. 507 MI->getOperand(3).setIsDead(); 508 } 509 } else { 510 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 511 } 512 513 // Skip the callee-saved push instructions. 514 bool PushedRegs = false; 515 int StackOffset = 2 * stackGrowth; 516 517 while (MBBI != MBB.end() && 518 (MBBI->getOpcode() == X86::PUSH32r || 519 MBBI->getOpcode() == X86::PUSH64r)) { 520 PushedRegs = true; 521 ++MBBI; 522 523 if (!HasFP && needsFrameMoves) { 524 // Mark callee-saved push instruction. 525 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 526 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 527 528 // Define the current CFA rule to use the provided offset. 529 unsigned Ptr = StackSize ? 530 MachineLocation::VirtualFP : StackPtr; 531 MachineLocation SPDst(Ptr); 532 MachineLocation SPSrc(Ptr, StackOffset); 533 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 534 StackOffset += stackGrowth; 535 } 536 } 537 538 DL = MBB.findDebugLoc(MBBI); 539 540 // If there is an SUB32ri of ESP immediately before this instruction, merge 541 // the two. This can be the case when tail call elimination is enabled and 542 // the callee has more arguments then the caller. 543 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 544 545 // If there is an ADD32ri or SUB32ri of ESP immediately after this 546 // instruction, merge the two instructions. 547 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 548 549 // Adjust stack pointer: ESP -= numbytes. 550 551 // Windows and cygwin/mingw require a prologue helper routine when allocating 552 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 553 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 554 // stack and adjust the stack pointer in one go. The 64-bit version of 555 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 556 // responsible for adjusting the stack pointer. Touching the stack at 4K 557 // increments is necessary to ensure that the guard pages used by the OS 558 // virtual memory manager are allocated in correct sequence. 559 if (NumBytes >= 4096 && 560 (STI.isTargetCygMing() || STI.isTargetWin32()) && 561 !STI.isTargetEnvMacho()) { 562 // Check whether EAX is livein for this function. 563 bool isEAXAlive = isEAXLiveIn(MF); 564 565 const char *StackProbeSymbol = 566 STI.isTargetWindows() ? "_chkstk" : "_alloca"; 567 if (Is64Bit && STI.isTargetCygMing()) 568 StackProbeSymbol = "__chkstk"; 569 unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; 570 if (!isEAXAlive) { 571 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 572 .addImm(NumBytes); 573 BuildMI(MBB, MBBI, DL, TII.get(CallOp)) 574 .addExternalSymbol(StackProbeSymbol) 575 .addReg(StackPtr, RegState::Define | RegState::Implicit) 576 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 577 } else { 578 // Save EAX 579 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 580 .addReg(X86::EAX, RegState::Kill); 581 582 // Allocate NumBytes-4 bytes on stack. We'll also use 4 already 583 // allocated bytes for EAX. 584 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 585 .addImm(NumBytes - 4); 586 BuildMI(MBB, MBBI, DL, TII.get(CallOp)) 587 .addExternalSymbol(StackProbeSymbol) 588 .addReg(StackPtr, RegState::Define | RegState::Implicit) 589 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 590 591 // Restore EAX 592 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 593 X86::EAX), 594 StackPtr, false, NumBytes - 4); 595 MBB.insert(MBBI, MI); 596 } 597 } else if (NumBytes >= 4096 && 598 STI.isTargetWin64() && 599 !STI.isTargetEnvMacho()) { 600 // Sanity check that EAX is not livein for this function. It should 601 // not be, so throw an assert. 602 assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); 603 604 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 605 // Function prologue is responsible for adjusting the stack pointer. 606 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 607 .addImm(NumBytes); 608 BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) 609 .addExternalSymbol("__chkstk") 610 .addReg(StackPtr, RegState::Define | RegState::Implicit); 611 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, 612 TII, *RegInfo); 613 } else if (NumBytes) 614 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, 615 TII, *RegInfo); 616 617 if ((NumBytes || PushedRegs) && needsFrameMoves) { 618 // Mark end of stack pointer adjustment. 619 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 620 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 621 622 if (!HasFP && NumBytes) { 623 // Define the current CFA rule to use the provided offset. 624 if (StackSize) { 625 MachineLocation SPDst(MachineLocation::VirtualFP); 626 MachineLocation SPSrc(MachineLocation::VirtualFP, 627 -StackSize + stackGrowth); 628 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 629 } else { 630 MachineLocation SPDst(StackPtr); 631 MachineLocation SPSrc(StackPtr, stackGrowth); 632 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 633 } 634 } 635 636 // Emit DWARF info specifying the offsets of the callee-saved registers. 637 if (PushedRegs) 638 emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); 639 } 640} 641 642void X86FrameLowering::emitEpilogue(MachineFunction &MF, 643 MachineBasicBlock &MBB) const { 644 const MachineFrameInfo *MFI = MF.getFrameInfo(); 645 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 646 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 647 const X86InstrInfo &TII = *TM.getInstrInfo(); 648 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 649 assert(MBBI != MBB.end() && "Returning block has no instructions"); 650 unsigned RetOpcode = MBBI->getOpcode(); 651 DebugLoc DL = MBBI->getDebugLoc(); 652 bool Is64Bit = STI.is64Bit(); 653 unsigned StackAlign = getStackAlignment(); 654 unsigned SlotSize = RegInfo->getSlotSize(); 655 unsigned FramePtr = RegInfo->getFrameRegister(MF); 656 unsigned StackPtr = RegInfo->getStackRegister(); 657 658 switch (RetOpcode) { 659 default: 660 llvm_unreachable("Can only insert epilog into returning blocks"); 661 case X86::RET: 662 case X86::RETI: 663 case X86::TCRETURNdi: 664 case X86::TCRETURNri: 665 case X86::TCRETURNmi: 666 case X86::TCRETURNdi64: 667 case X86::TCRETURNri64: 668 case X86::TCRETURNmi64: 669 case X86::EH_RETURN: 670 case X86::EH_RETURN64: 671 break; // These are ok 672 } 673 674 // Get the number of bytes to allocate from the FrameInfo. 675 uint64_t StackSize = MFI->getStackSize(); 676 uint64_t MaxAlign = MFI->getMaxAlignment(); 677 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 678 uint64_t NumBytes = 0; 679 680 // If we're forcing a stack realignment we can't rely on just the frame 681 // info, we need to know the ABI stack alignment as well in case we 682 // have a call out. Otherwise just make sure we have some alignment - we'll 683 // go with the minimum. 684 if (ForceStackAlign) { 685 if (MFI->hasCalls()) 686 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 687 else 688 MaxAlign = MaxAlign ? MaxAlign : 4; 689 } 690 691 if (hasFP(MF)) { 692 // Calculate required stack adjustment. 693 uint64_t FrameSize = StackSize - SlotSize; 694 if (RegInfo->needsStackRealignment(MF)) 695 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; 696 697 NumBytes = FrameSize - CSSize; 698 699 // Pop EBP. 700 BuildMI(MBB, MBBI, DL, 701 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 702 } else { 703 NumBytes = StackSize - CSSize; 704 } 705 706 // Skip the callee-saved pop instructions. 707 MachineBasicBlock::iterator LastCSPop = MBBI; 708 while (MBBI != MBB.begin()) { 709 MachineBasicBlock::iterator PI = prior(MBBI); 710 unsigned Opc = PI->getOpcode(); 711 712 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 713 !PI->getDesc().isTerminator()) 714 break; 715 716 --MBBI; 717 } 718 719 DL = MBBI->getDebugLoc(); 720 721 // If there is an ADD32ri or SUB32ri of ESP immediately before this 722 // instruction, merge the two instructions. 723 if (NumBytes || MFI->hasVarSizedObjects()) 724 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 725 726 // If dynamic alloca is used, then reset esp to point to the last callee-saved 727 // slot before popping them off! Same applies for the case, when stack was 728 // realigned. 729 if (RegInfo->needsStackRealignment(MF)) { 730 // We cannot use LEA here, because stack pointer was realigned. We need to 731 // deallocate local frame back. 732 if (CSSize) { 733 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); 734 MBBI = prior(LastCSPop); 735 } 736 737 BuildMI(MBB, MBBI, DL, 738 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 739 StackPtr).addReg(FramePtr); 740 } else if (MFI->hasVarSizedObjects()) { 741 if (CSSize) { 742 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; 743 MachineInstr *MI = 744 addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), 745 FramePtr, false, -CSSize); 746 MBB.insert(MBBI, MI); 747 } else { 748 BuildMI(MBB, MBBI, DL, 749 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) 750 .addReg(FramePtr); 751 } 752 } else if (NumBytes) { 753 // Adjust stack pointer back: ESP += numbytes. 754 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); 755 } 756 757 // We're returning from function via eh_return. 758 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 759 MBBI = MBB.getLastNonDebugInstr(); 760 MachineOperand &DestAddr = MBBI->getOperand(0); 761 assert(DestAddr.isReg() && "Offset should be in register!"); 762 BuildMI(MBB, MBBI, DL, 763 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 764 StackPtr).addReg(DestAddr.getReg()); 765 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 766 RetOpcode == X86::TCRETURNmi || 767 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 768 RetOpcode == X86::TCRETURNmi64) { 769 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 770 // Tail call return: adjust the stack pointer and jump to callee. 771 MBBI = MBB.getLastNonDebugInstr(); 772 MachineOperand &JumpTarget = MBBI->getOperand(0); 773 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 774 assert(StackAdjust.isImm() && "Expecting immediate value."); 775 776 // Adjust stack pointer. 777 int StackAdj = StackAdjust.getImm(); 778 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 779 int Offset = 0; 780 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 781 782 // Incoporate the retaddr area. 783 Offset = StackAdj-MaxTCDelta; 784 assert(Offset >= 0 && "Offset should never be negative"); 785 786 if (Offset) { 787 // Check for possible merge with preceeding ADD instruction. 788 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 789 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); 790 } 791 792 // Jump to label or value in register. 793 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 794 MachineInstrBuilder MIB = 795 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) 796 ? X86::TAILJMPd : X86::TAILJMPd64)); 797 if (JumpTarget.isGlobal()) 798 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 799 JumpTarget.getTargetFlags()); 800 else { 801 assert(JumpTarget.isSymbol()); 802 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 803 JumpTarget.getTargetFlags()); 804 } 805 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 806 MachineInstrBuilder MIB = 807 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) 808 ? X86::TAILJMPm : X86::TAILJMPm64)); 809 for (unsigned i = 0; i != 5; ++i) 810 MIB.addOperand(MBBI->getOperand(i)); 811 } else if (RetOpcode == X86::TCRETURNri64) { 812 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). 813 addReg(JumpTarget.getReg(), RegState::Kill); 814 } else { 815 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 816 addReg(JumpTarget.getReg(), RegState::Kill); 817 } 818 819 MachineInstr *NewMI = prior(MBBI); 820 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) 821 NewMI->addOperand(MBBI->getOperand(i)); 822 823 // Delete the pseudo instruction TCRETURN. 824 MBB.erase(MBBI); 825 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 826 (X86FI->getTCReturnAddrDelta() < 0)) { 827 // Add the return addr area delta back since we are not tail calling. 828 int delta = -1*X86FI->getTCReturnAddrDelta(); 829 MBBI = MBB.getLastNonDebugInstr(); 830 831 // Check for possible merge with preceeding ADD instruction. 832 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 833 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); 834 } 835} 836 837void 838X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { 839 // Calculate amount of bytes used for return address storing 840 int stackGrowth = (STI.is64Bit() ? -8 : -4); 841 const X86RegisterInfo *RI = TM.getRegisterInfo(); 842 843 // Initial state of the frame pointer is esp+stackGrowth. 844 MachineLocation Dst(MachineLocation::VirtualFP); 845 MachineLocation Src(RI->getStackRegister(), stackGrowth); 846 Moves.push_back(MachineMove(0, Dst, Src)); 847 848 // Add return address to move list 849 MachineLocation CSDst(RI->getStackRegister(), stackGrowth); 850 MachineLocation CSSrc(RI->getRARegister()); 851 Moves.push_back(MachineMove(0, CSDst, CSSrc)); 852} 853 854int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { 855 const X86RegisterInfo *RI = 856 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 857 const MachineFrameInfo *MFI = MF.getFrameInfo(); 858 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 859 uint64_t StackSize = MFI->getStackSize(); 860 861 if (RI->needsStackRealignment(MF)) { 862 if (FI < 0) { 863 // Skip the saved EBP. 864 Offset += RI->getSlotSize(); 865 } else { 866 unsigned Align = MFI->getObjectAlignment(FI); 867 assert((-(Offset + StackSize)) % Align == 0); 868 Align = 0; 869 return Offset + StackSize; 870 } 871 // FIXME: Support tail calls 872 } else { 873 if (!hasFP(MF)) 874 return Offset + StackSize; 875 876 // Skip the saved EBP. 877 Offset += RI->getSlotSize(); 878 879 // Skip the RETADDR move area 880 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 881 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 882 if (TailCallReturnAddrDelta < 0) 883 Offset -= TailCallReturnAddrDelta; 884 } 885 886 return Offset; 887} 888 889bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 890 MachineBasicBlock::iterator MI, 891 const std::vector<CalleeSavedInfo> &CSI, 892 const TargetRegisterInfo *TRI) const { 893 if (CSI.empty()) 894 return false; 895 896 DebugLoc DL = MBB.findDebugLoc(MI); 897 898 MachineFunction &MF = *MBB.getParent(); 899 900 bool isWin64 = STI.isTargetWin64(); 901 unsigned SlotSize = STI.is64Bit() ? 8 : 4; 902 unsigned FPReg = TRI->getFrameRegister(MF); 903 unsigned CalleeFrameSize = 0; 904 905 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 906 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 907 908 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 909 for (unsigned i = CSI.size(); i != 0; --i) { 910 unsigned Reg = CSI[i-1].getReg(); 911 // Add the callee-saved register as live-in. It's killed at the spill. 912 MBB.addLiveIn(Reg); 913 if (Reg == FPReg) 914 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 915 continue; 916 if (!X86::VR128RegClass.contains(Reg) && !isWin64) { 917 CalleeFrameSize += SlotSize; 918 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); 919 } else { 920 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 921 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), 922 RC, TRI); 923 } 924 } 925 926 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 927 return true; 928} 929 930bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 931 MachineBasicBlock::iterator MI, 932 const std::vector<CalleeSavedInfo> &CSI, 933 const TargetRegisterInfo *TRI) const { 934 if (CSI.empty()) 935 return false; 936 937 DebugLoc DL = MBB.findDebugLoc(MI); 938 939 MachineFunction &MF = *MBB.getParent(); 940 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 941 unsigned FPReg = TRI->getFrameRegister(MF); 942 bool isWin64 = STI.isTargetWin64(); 943 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 944 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 945 unsigned Reg = CSI[i].getReg(); 946 if (Reg == FPReg) 947 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 948 continue; 949 if (!X86::VR128RegClass.contains(Reg) && !isWin64) { 950 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 951 } else { 952 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 953 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), 954 RC, TRI); 955 } 956 } 957 return true; 958} 959 960void 961X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 962 RegScavenger *RS) const { 963 MachineFrameInfo *MFI = MF.getFrameInfo(); 964 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 965 unsigned SlotSize = RegInfo->getSlotSize(); 966 967 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 968 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 969 970 if (TailCallReturnAddrDelta < 0) { 971 // create RETURNADDR area 972 // arg 973 // arg 974 // RETADDR 975 // { ... 976 // RETADDR area 977 // ... 978 // } 979 // [EBP] 980 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 981 (-1U*SlotSize)+TailCallReturnAddrDelta, true); 982 } 983 984 if (hasFP(MF)) { 985 assert((TailCallReturnAddrDelta <= 0) && 986 "The Delta should always be zero or negative"); 987 const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); 988 989 // Create a frame entry for the EBP register that must be saved. 990 int FrameIdx = MFI->CreateFixedObject(SlotSize, 991 -(int)SlotSize + 992 TFI.getOffsetOfLocalArea() + 993 TailCallReturnAddrDelta, 994 true); 995 assert(FrameIdx == MFI->getObjectIndexBegin() && 996 "Slot for EBP register must be last in order to be found!"); 997 FrameIdx = 0; 998 } 999} 1000