X86FrameLowering.cpp revision de77055a68a3fc495e01b682f00059af3e38822e
1//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of TargetFrameLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "X86FrameLowering.h" 15#include "X86InstrBuilder.h" 16#include "X86InstrInfo.h" 17#include "X86MachineFunctionInfo.h" 18#include "X86TargetMachine.h" 19#include "llvm/Function.h" 20#include "llvm/CodeGen/MachineFrameInfo.h" 21#include "llvm/CodeGen/MachineFunction.h" 22#include "llvm/CodeGen/MachineInstrBuilder.h" 23#include "llvm/CodeGen/MachineModuleInfo.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/MC/MCAsmInfo.h" 26#include "llvm/MC/MCSymbol.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Target/TargetOptions.h" 29#include "llvm/Support/CommandLine.h" 30#include "llvm/ADT/SmallSet.h" 31 32using namespace llvm; 33 34// FIXME: completely move here. 35extern cl::opt<bool> ForceStackAlign; 36 37// FIXME: Remove once linker support is available. The feature exists only on 38// Darwin at the moment. 39static cl::opt<bool> 40GenerateCompactUnwind("gen-compact-unwind", 41 cl::desc("Generate compact unwind encoding"), 42 cl::Hidden); 43 44bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 45 return !MF.getFrameInfo()->hasVarSizedObjects(); 46} 47 48/// hasFP - Return true if the specified function should have a dedicated frame 49/// pointer register. This is true if the function has variable sized allocas 50/// or if frame pointer elimination is disabled. 51bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 52 const MachineFrameInfo *MFI = MF.getFrameInfo(); 53 const MachineModuleInfo &MMI = MF.getMMI(); 54 const TargetRegisterInfo *RI = TM.getRegisterInfo(); 55 56 return (DisableFramePointerElim(MF) || 57 RI->needsStackRealignment(MF) || 58 MFI->hasVarSizedObjects() || 59 MFI->isFrameAddressTaken() || 60 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 61 MMI.callsUnwindInit()); 62} 63 64static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { 65 if (is64Bit) { 66 if (isInt<8>(Imm)) 67 return X86::SUB64ri8; 68 return X86::SUB64ri32; 69 } else { 70 if (isInt<8>(Imm)) 71 return X86::SUB32ri8; 72 return X86::SUB32ri; 73 } 74} 75 76static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { 77 if (is64Bit) { 78 if (isInt<8>(Imm)) 79 return X86::ADD64ri8; 80 return X86::ADD64ri32; 81 } else { 82 if (isInt<8>(Imm)) 83 return X86::ADD32ri8; 84 return X86::ADD32ri; 85 } 86} 87 88/// findDeadCallerSavedReg - Return a caller-saved register that isn't live 89/// when it reaches the "return" instruction. We can then pop a stack object 90/// to this register without worry about clobbering it. 91static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 92 MachineBasicBlock::iterator &MBBI, 93 const TargetRegisterInfo &TRI, 94 bool Is64Bit) { 95 const MachineFunction *MF = MBB.getParent(); 96 const Function *F = MF->getFunction(); 97 if (!F || MF->getMMI().callsEHReturn()) 98 return 0; 99 100 static const unsigned CallerSavedRegs32Bit[] = { 101 X86::EAX, X86::EDX, X86::ECX 102 }; 103 104 static const unsigned CallerSavedRegs64Bit[] = { 105 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 106 X86::R8, X86::R9, X86::R10, X86::R11 107 }; 108 109 unsigned Opc = MBBI->getOpcode(); 110 switch (Opc) { 111 default: return 0; 112 case X86::RET: 113 case X86::RETI: 114 case X86::TCRETURNdi: 115 case X86::TCRETURNri: 116 case X86::TCRETURNmi: 117 case X86::TCRETURNdi64: 118 case X86::TCRETURNri64: 119 case X86::TCRETURNmi64: 120 case X86::EH_RETURN: 121 case X86::EH_RETURN64: { 122 SmallSet<unsigned, 8> Uses; 123 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 124 MachineOperand &MO = MBBI->getOperand(i); 125 if (!MO.isReg() || MO.isDef()) 126 continue; 127 unsigned Reg = MO.getReg(); 128 if (!Reg) 129 continue; 130 for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) 131 Uses.insert(*AsI); 132 } 133 134 const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 135 for (; *CS; ++CS) 136 if (!Uses.count(*CS)) 137 return *CS; 138 } 139 } 140 141 return 0; 142} 143 144 145/// emitSPUpdate - Emit a series of instructions to increment / decrement the 146/// stack pointer by a constant value. 147static 148void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 149 unsigned StackPtr, int64_t NumBytes, 150 bool Is64Bit, const TargetInstrInfo &TII, 151 const TargetRegisterInfo &TRI) { 152 bool isSub = NumBytes < 0; 153 uint64_t Offset = isSub ? -NumBytes : NumBytes; 154 unsigned Opc = isSub ? 155 getSUBriOpcode(Is64Bit, Offset) : 156 getADDriOpcode(Is64Bit, Offset); 157 uint64_t Chunk = (1LL << 31) - 1; 158 DebugLoc DL = MBB.findDebugLoc(MBBI); 159 160 while (Offset) { 161 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 162 if (ThisVal == (Is64Bit ? 8 : 4)) { 163 // Use push / pop instead. 164 unsigned Reg = isSub 165 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 166 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 167 if (Reg) { 168 Opc = isSub 169 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 170 : (Is64Bit ? X86::POP64r : X86::POP32r); 171 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 172 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 173 if (isSub) 174 MI->setFlag(MachineInstr::FrameSetup); 175 Offset -= ThisVal; 176 continue; 177 } 178 } 179 180 MachineInstr *MI = 181 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 182 .addReg(StackPtr) 183 .addImm(ThisVal); 184 if (isSub) 185 MI->setFlag(MachineInstr::FrameSetup); 186 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 187 Offset -= ThisVal; 188 } 189} 190 191/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 192static 193void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 194 unsigned StackPtr, uint64_t *NumBytes = NULL) { 195 if (MBBI == MBB.begin()) return; 196 197 MachineBasicBlock::iterator PI = prior(MBBI); 198 unsigned Opc = PI->getOpcode(); 199 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 200 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 201 PI->getOperand(0).getReg() == StackPtr) { 202 if (NumBytes) 203 *NumBytes += PI->getOperand(2).getImm(); 204 MBB.erase(PI); 205 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 206 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 207 PI->getOperand(0).getReg() == StackPtr) { 208 if (NumBytes) 209 *NumBytes -= PI->getOperand(2).getImm(); 210 MBB.erase(PI); 211 } 212} 213 214/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. 215static 216void mergeSPUpdatesDown(MachineBasicBlock &MBB, 217 MachineBasicBlock::iterator &MBBI, 218 unsigned StackPtr, uint64_t *NumBytes = NULL) { 219 // FIXME: THIS ISN'T RUN!!! 220 return; 221 222 if (MBBI == MBB.end()) return; 223 224 MachineBasicBlock::iterator NI = llvm::next(MBBI); 225 if (NI == MBB.end()) return; 226 227 unsigned Opc = NI->getOpcode(); 228 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 229 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 230 NI->getOperand(0).getReg() == StackPtr) { 231 if (NumBytes) 232 *NumBytes -= NI->getOperand(2).getImm(); 233 MBB.erase(NI); 234 MBBI = NI; 235 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 236 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 237 NI->getOperand(0).getReg() == StackPtr) { 238 if (NumBytes) 239 *NumBytes += NI->getOperand(2).getImm(); 240 MBB.erase(NI); 241 MBBI = NI; 242 } 243} 244 245/// mergeSPUpdates - Checks the instruction before/after the passed 246/// instruction. If it is an ADD/SUB instruction it is deleted argument and the 247/// stack adjustment is returned as a positive value for ADD and a negative for 248/// SUB. 249static int mergeSPUpdates(MachineBasicBlock &MBB, 250 MachineBasicBlock::iterator &MBBI, 251 unsigned StackPtr, 252 bool doMergeWithPrevious) { 253 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 254 (!doMergeWithPrevious && MBBI == MBB.end())) 255 return 0; 256 257 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 258 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); 259 unsigned Opc = PI->getOpcode(); 260 int Offset = 0; 261 262 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 263 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 264 PI->getOperand(0).getReg() == StackPtr){ 265 Offset += PI->getOperand(2).getImm(); 266 MBB.erase(PI); 267 if (!doMergeWithPrevious) MBBI = NI; 268 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 269 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 270 PI->getOperand(0).getReg() == StackPtr) { 271 Offset -= PI->getOperand(2).getImm(); 272 MBB.erase(PI); 273 if (!doMergeWithPrevious) MBBI = NI; 274 } 275 276 return Offset; 277} 278 279static bool isEAXLiveIn(MachineFunction &MF) { 280 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 281 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 282 unsigned Reg = II->first; 283 284 if (Reg == X86::EAX || Reg == X86::AX || 285 Reg == X86::AH || Reg == X86::AL) 286 return true; 287 } 288 289 return false; 290} 291 292void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, 293 MCSymbol *Label, 294 unsigned FramePtr) const { 295 MachineFrameInfo *MFI = MF.getFrameInfo(); 296 MachineModuleInfo &MMI = MF.getMMI(); 297 298 // Add callee saved registers to move list. 299 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 300 if (CSI.empty()) return; 301 302 std::vector<MachineMove> &Moves = MMI.getFrameMoves(); 303 const TargetData *TD = TM.getTargetData(); 304 bool HasFP = hasFP(MF); 305 306 // Calculate amount of bytes used for return address storing. 307 int stackGrowth = -TD->getPointerSize(); 308 309 // FIXME: This is dirty hack. The code itself is pretty mess right now. 310 // It should be rewritten from scratch and generalized sometimes. 311 312 // Determine maximum offset (minimum due to stack growth). 313 int64_t MaxOffset = 0; 314 for (std::vector<CalleeSavedInfo>::const_iterator 315 I = CSI.begin(), E = CSI.end(); I != E; ++I) 316 MaxOffset = std::min(MaxOffset, 317 MFI->getObjectOffset(I->getFrameIdx())); 318 319 // Calculate offsets. 320 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 321 for (std::vector<CalleeSavedInfo>::const_iterator 322 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 323 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 324 unsigned Reg = I->getReg(); 325 Offset = MaxOffset - Offset + saveAreaOffset; 326 327 // Don't output a new machine move if we're re-saving the frame 328 // pointer. This happens when the PrologEpilogInserter has inserted an extra 329 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 330 // generates one when frame pointers are used. If we generate a "machine 331 // move" for this extra "PUSH", the linker will lose track of the fact that 332 // the frame pointer should have the value of the first "PUSH" when it's 333 // trying to unwind. 334 // 335 // FIXME: This looks inelegant. It's possibly correct, but it's covering up 336 // another bug. I.e., one where we generate a prolog like this: 337 // 338 // pushl %ebp 339 // movl %esp, %ebp 340 // pushl %ebp 341 // pushl %esi 342 // ... 343 // 344 // The immediate re-push of EBP is unnecessary. At the least, it's an 345 // optimization bug. EBP can be used as a scratch register in certain 346 // cases, but probably not when we have a frame pointer. 347 if (HasFP && FramePtr == Reg) 348 continue; 349 350 MachineLocation CSDst(MachineLocation::VirtualFP, Offset); 351 MachineLocation CSSrc(Reg); 352 Moves.push_back(MachineMove(Label, CSDst, CSSrc)); 353 } 354} 355 356/// getCompactUnwindRegNum - Get the compact unwind number for a given 357/// register. The number corresponds to the enum lists in 358/// compact_unwind_encoding.h. 359static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { 360 int Idx = 1; 361 for (; *CURegs; ++CURegs, ++Idx) 362 if (*CURegs == Reg) 363 return Idx; 364 365 return -1; 366} 367 368/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding 369/// used with frameless stacks. It is passed the number of registers to be saved 370/// and an array of the registers saved. 371static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], 372 unsigned RegCount, 373 bool Is64Bit) { 374 // The saved registers are numbered from 1 to 6. In order to encode the order 375 // in which they were saved, we re-number them according to their place in the 376 // register order. The re-numbering is relative to the last re-numbered 377 // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: 378 // 379 // Orig Re-Num 380 // ---- ------ 381 // 6 6 382 // 2 2 383 // 4 3 384 // 5 3 385 // 386 static const unsigned CU32BitRegs[] = { 387 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 388 }; 389 static const unsigned CU64BitRegs[] = { 390 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 391 }; 392 const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); 393 394 uint32_t RenumRegs[6]; 395 for (unsigned i = 6 - RegCount; i < 6; ++i) { 396 int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); 397 if (CUReg == -1) return ~0U; 398 SavedRegs[i] = CUReg; 399 400 unsigned Countless = 0; 401 for (unsigned j = 6 - RegCount; j < i; ++j) 402 if (SavedRegs[j] < SavedRegs[i]) 403 ++Countless; 404 405 RenumRegs[i] = SavedRegs[i] - Countless - 1; 406 } 407 408 // Take the renumbered values and encode them into a 10-bit number. 409 uint32_t permutationEncoding = 0; 410 switch (RegCount) { 411 case 6: 412 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 413 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 414 + RenumRegs[4]; 415 break; 416 case 5: 417 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 418 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 419 + RenumRegs[5]; 420 break; 421 case 4: 422 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 423 + 3 * RenumRegs[4] + RenumRegs[5]; 424 break; 425 case 3: 426 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 427 + RenumRegs[5]; 428 break; 429 case 2: 430 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 431 break; 432 case 1: 433 permutationEncoding |= RenumRegs[5]; 434 break; 435 } 436 437 assert((permutationEncoding & 0x3FF) == permutationEncoding && 438 "Invalid compact register encoding!"); 439 return permutationEncoding; 440} 441 442/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a 443/// compact encoding with a frame pointer. 444static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], 445 bool Is64Bit) { 446 static const unsigned CU32BitRegs[] = { 447 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 448 }; 449 static const unsigned CU64BitRegs[] = { 450 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 451 }; 452 const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); 453 454 // Encode the registers in the order they were saved, 3-bits per register. The 455 // registers are numbered from 1 to 6. 456 uint32_t RegEnc = 0; 457 for (int I = 5; I >= 0; --I) { 458 unsigned Reg = SavedRegs[I]; 459 if (Reg == 0) break; 460 int CURegNum = getCompactUnwindRegNum(CURegs, Reg); 461 if (CURegNum == -1) 462 return ~0U; 463 RegEnc |= (CURegNum & 0x7) << (5 - I); 464 } 465 466 assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); 467 return RegEnc; 468} 469 470uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { 471 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 472 unsigned FramePtr = RegInfo->getFrameRegister(MF); 473 unsigned StackPtr = RegInfo->getStackRegister(); 474 475 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 476 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 477 478 bool Is64Bit = STI.is64Bit(); 479 bool HasFP = hasFP(MF); 480 481 unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; 482 int SavedRegIdx = 6; 483 484 unsigned OffsetSize = (Is64Bit ? 8 : 4); 485 486 unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r); 487 unsigned PushInstrSize = 1; 488 unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); 489 unsigned MoveInstrSize = (Is64Bit ? 3 : 2); 490 unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta); 491 unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); 492 493 unsigned StackDivide = (Is64Bit ? 8 : 4); 494 495 unsigned InstrOffset = 0; 496 unsigned CFAOffset = 0; 497 unsigned StackAdjust = 0; 498 499 MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. 500 bool ExpectEnd = false; 501 for (MachineBasicBlock::iterator 502 MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { 503 MachineInstr &MI = *MBBI; 504 unsigned Opc = MI.getOpcode(); 505 if (Opc == X86::PROLOG_LABEL) continue; 506 if (!MI.getFlag(MachineInstr::FrameSetup)) break; 507 508 // We don't exect any more prolog instructions. 509 if (ExpectEnd) return 0; 510 511 if (Opc == PushInstr) { 512 // If there are too many saved registers, we cannot use compact encoding. 513 if (--SavedRegIdx < 0) return 0; 514 515 SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg(); 516 CFAOffset += OffsetSize; 517 InstrOffset += PushInstrSize; 518 } else if (Opc == MoveInstr) { 519 unsigned SrcReg = MI.getOperand(1).getReg(); 520 unsigned DstReg = MI.getOperand(0).getReg(); 521 522 if (DstReg != FramePtr || SrcReg != StackPtr) 523 return 0; 524 525 CFAOffset = 0; 526 memset(SavedRegs, 0, sizeof(SavedRegs)); 527 InstrOffset += MoveInstrSize; 528 } else if (Opc == SubtractInstr) { 529 if (StackAdjust) 530 // We all ready have a stack pointer adjustment. 531 return 0; 532 533 if (!MI.getOperand(0).isReg() || 534 MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || 535 MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm()) 536 // We need this to be a stack adjustment pointer. Something like: 537 // 538 // %RSP<def> = SUB64ri8 %RSP, 48 539 return 0; 540 541 StackAdjust = MI.getOperand(2).getImm() / StackDivide; 542 SubtractInstrIdx += InstrOffset; 543 ExpectEnd = true; 544 } 545 } 546 547 // Encode that we are using EBP/RBP as the frame pointer. 548 uint32_t CompactUnwindEncoding = 0; 549 CFAOffset /= StackDivide; 550 if (HasFP) { 551 if ((CFAOffset & 0xFF) != CFAOffset) 552 // Offset was too big for compact encoding. 553 return 0; 554 555 // Get the encoding of the saved registers when we have a frame pointer. 556 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); 557 if (RegEnc == ~0U) 558 return 0; 559 560 CompactUnwindEncoding |= 0x01000000; 561 CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16; 562 CompactUnwindEncoding |= RegEnc & 0x7FFF; 563 } else { 564 unsigned FullOffset = CFAOffset + StackAdjust; 565 if ((FullOffset & 0xFF) == FullOffset) { 566 // Frameless stack. 567 CompactUnwindEncoding |= 0x02000000; 568 CompactUnwindEncoding |= (FullOffset & 0xFF) << 16; 569 } else { 570 if ((CFAOffset & 0x7) != CFAOffset) 571 // The extra stack adjustments are too big for us to handle. 572 return 0; 573 574 // Frameless stack with an offset too large for us to encode compactly. 575 CompactUnwindEncoding |= 0x03000000; 576 577 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 578 // instruction. 579 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 580 581 // Encode any extra stack stack changes (done via push instructions). 582 CompactUnwindEncoding |= (CFAOffset & 0x7) << 13; 583 } 584 585 // Get the encoding of the saved registers when we don't have a frame 586 // pointer. 587 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, 588 6 - SavedRegIdx, 589 Is64Bit); 590 if (RegEnc == ~0U) return 0; 591 CompactUnwindEncoding |= RegEnc & 0x3FF; 592 } 593 594 return CompactUnwindEncoding; 595} 596 597/// emitPrologue - Push callee-saved registers onto the stack, which 598/// automatically adjust the stack pointer. Adjust the stack pointer to allocate 599/// space for local variables. Also emit labels used by the exception handler to 600/// generate the exception handling frames. 601void X86FrameLowering::emitPrologue(MachineFunction &MF) const { 602 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 603 MachineBasicBlock::iterator MBBI = MBB.begin(); 604 MachineFrameInfo *MFI = MF.getFrameInfo(); 605 const Function *Fn = MF.getFunction(); 606 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 607 const X86InstrInfo &TII = *TM.getInstrInfo(); 608 MachineModuleInfo &MMI = MF.getMMI(); 609 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 610 bool needsFrameMoves = MMI.hasDebugInfo() || 611 Fn->needsUnwindTableEntry(); 612 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 613 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 614 bool HasFP = hasFP(MF); 615 bool Is64Bit = STI.is64Bit(); 616 bool IsWin64 = STI.isTargetWin64(); 617 unsigned StackAlign = getStackAlignment(); 618 unsigned SlotSize = RegInfo->getSlotSize(); 619 unsigned FramePtr = RegInfo->getFrameRegister(MF); 620 unsigned StackPtr = RegInfo->getStackRegister(); 621 DebugLoc DL; 622 623 // If we're forcing a stack realignment we can't rely on just the frame 624 // info, we need to know the ABI stack alignment as well in case we 625 // have a call out. Otherwise just make sure we have some alignment - we'll 626 // go with the minimum SlotSize. 627 if (ForceStackAlign) { 628 if (MFI->hasCalls()) 629 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 630 else if (MaxAlign < SlotSize) 631 MaxAlign = SlotSize; 632 } 633 634 // Add RETADDR move area to callee saved frame size. 635 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 636 if (TailCallReturnAddrDelta < 0) 637 X86FI->setCalleeSavedFrameSize( 638 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 639 640 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 641 // function, and use up to 128 bytes of stack space, don't have a frame 642 // pointer, calls, or dynamic alloca then we do not need to adjust the 643 // stack pointer (we fit in the Red Zone). 644 if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && 645 !RegInfo->needsStackRealignment(MF) && 646 !MFI->hasVarSizedObjects() && // No dynamic alloca. 647 !MFI->adjustsStack() && // No calls. 648 !IsWin64) { // Win64 has no Red Zone 649 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 650 if (HasFP) MinSize += SlotSize; 651 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 652 MFI->setStackSize(StackSize); 653 } 654 655 // Insert stack pointer adjustment for later moving of return addr. Only 656 // applies to tail call optimized functions where the callee argument stack 657 // size is bigger than the callers. 658 if (TailCallReturnAddrDelta < 0) { 659 MachineInstr *MI = 660 BuildMI(MBB, MBBI, DL, 661 TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), 662 StackPtr) 663 .addReg(StackPtr) 664 .addImm(-TailCallReturnAddrDelta) 665 .setMIFlag(MachineInstr::FrameSetup); 666 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 667 } 668 669 // Mapping for machine moves: 670 // 671 // DST: VirtualFP AND 672 // SRC: VirtualFP => DW_CFA_def_cfa_offset 673 // ELSE => DW_CFA_def_cfa 674 // 675 // SRC: VirtualFP AND 676 // DST: Register => DW_CFA_def_cfa_register 677 // 678 // ELSE 679 // OFFSET < 0 => DW_CFA_offset_extended_sf 680 // REG < 64 => DW_CFA_offset + Reg 681 // ELSE => DW_CFA_offset_extended 682 683 std::vector<MachineMove> &Moves = MMI.getFrameMoves(); 684 const TargetData *TD = MF.getTarget().getTargetData(); 685 uint64_t NumBytes = 0; 686 int stackGrowth = -TD->getPointerSize(); 687 688 if (HasFP) { 689 // Calculate required stack adjustment. 690 uint64_t FrameSize = StackSize - SlotSize; 691 if (RegInfo->needsStackRealignment(MF)) 692 FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 693 694 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 695 696 // Get the offset of the stack slot for the EBP register, which is 697 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 698 // Update the frame offset adjustment. 699 MFI->setOffsetAdjustment(-NumBytes); 700 701 // Save EBP/RBP into the appropriate stack slot. 702 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 703 .addReg(FramePtr, RegState::Kill) 704 .setMIFlag(MachineInstr::FrameSetup); 705 706 if (needsFrameMoves) { 707 // Mark the place where EBP/RBP was saved. 708 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 709 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 710 .addSym(FrameLabel); 711 712 // Define the current CFA rule to use the provided offset. 713 if (StackSize) { 714 MachineLocation SPDst(MachineLocation::VirtualFP); 715 MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); 716 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); 717 } else { 718 MachineLocation SPDst(StackPtr); 719 MachineLocation SPSrc(StackPtr, stackGrowth); 720 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); 721 } 722 723 // Change the rule for the FramePtr to be an "offset" rule. 724 MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); 725 MachineLocation FPSrc(FramePtr); 726 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); 727 } 728 729 // Update EBP with the new base value. 730 BuildMI(MBB, MBBI, DL, 731 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 732 .addReg(StackPtr) 733 .setMIFlag(MachineInstr::FrameSetup); 734 735 if (needsFrameMoves) { 736 // Mark effective beginning of when frame pointer becomes valid. 737 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 738 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 739 .addSym(FrameLabel); 740 741 // Define the current CFA to use the EBP/RBP register. 742 MachineLocation FPDst(FramePtr); 743 MachineLocation FPSrc(MachineLocation::VirtualFP); 744 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); 745 } 746 747 // Mark the FramePtr as live-in in every block except the entry. 748 for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); 749 I != E; ++I) 750 I->addLiveIn(FramePtr); 751 752 // Realign stack 753 if (RegInfo->needsStackRealignment(MF)) { 754 MachineInstr *MI = 755 BuildMI(MBB, MBBI, DL, 756 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) 757 .addReg(StackPtr) 758 .addImm(-MaxAlign) 759 .setMIFlag(MachineInstr::FrameSetup); 760 761 // The EFLAGS implicit def is dead. 762 MI->getOperand(3).setIsDead(); 763 } 764 } else { 765 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 766 } 767 768 // Skip the callee-saved push instructions. 769 bool PushedRegs = false; 770 int StackOffset = 2 * stackGrowth; 771 772 while (MBBI != MBB.end() && 773 (MBBI->getOpcode() == X86::PUSH32r || 774 MBBI->getOpcode() == X86::PUSH64r)) { 775 PushedRegs = true; 776 MBBI->setFlag(MachineInstr::FrameSetup); 777 ++MBBI; 778 779 if (!HasFP && needsFrameMoves) { 780 // Mark callee-saved push instruction. 781 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 782 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 783 784 // Define the current CFA rule to use the provided offset. 785 unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr; 786 MachineLocation SPDst(Ptr); 787 MachineLocation SPSrc(Ptr, StackOffset); 788 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 789 StackOffset += stackGrowth; 790 } 791 } 792 793 DL = MBB.findDebugLoc(MBBI); 794 795 // If there is an SUB32ri of ESP immediately before this instruction, merge 796 // the two. This can be the case when tail call elimination is enabled and 797 // the callee has more arguments then the caller. 798 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 799 800 // If there is an ADD32ri or SUB32ri of ESP immediately after this 801 // instruction, merge the two instructions. 802 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 803 804 // Adjust stack pointer: ESP -= numbytes. 805 806 // Windows and cygwin/mingw require a prologue helper routine when allocating 807 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 808 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 809 // stack and adjust the stack pointer in one go. The 64-bit version of 810 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 811 // responsible for adjusting the stack pointer. Touching the stack at 4K 812 // increments is necessary to ensure that the guard pages used by the OS 813 // virtual memory manager are allocated in correct sequence. 814 if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) { 815 const char *StackProbeSymbol; 816 bool isSPUpdateNeeded = false; 817 818 if (Is64Bit) { 819 if (STI.isTargetCygMing()) 820 StackProbeSymbol = "___chkstk"; 821 else { 822 StackProbeSymbol = "__chkstk"; 823 isSPUpdateNeeded = true; 824 } 825 } else if (STI.isTargetCygMing()) 826 StackProbeSymbol = "_alloca"; 827 else 828 StackProbeSymbol = "_chkstk"; 829 830 // Check whether EAX is livein for this function. 831 bool isEAXAlive = isEAXLiveIn(MF); 832 833 if (isEAXAlive) { 834 // Sanity check that EAX is not livein for this function. 835 // It should not be, so throw an assert. 836 assert(!Is64Bit && "EAX is livein in x64 case!"); 837 838 // Save EAX 839 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 840 .addReg(X86::EAX, RegState::Kill) 841 .setMIFlag(MachineInstr::FrameSetup); 842 } 843 844 if (Is64Bit) { 845 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 846 // Function prologue is responsible for adjusting the stack pointer. 847 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 848 .addImm(NumBytes) 849 .setMIFlag(MachineInstr::FrameSetup); 850 } else { 851 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 852 // We'll also use 4 already allocated bytes for EAX. 853 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 854 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 855 .setMIFlag(MachineInstr::FrameSetup); 856 } 857 858 BuildMI(MBB, MBBI, DL, 859 TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) 860 .addExternalSymbol(StackProbeSymbol) 861 .addReg(StackPtr, RegState::Define | RegState::Implicit) 862 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) 863 .setMIFlag(MachineInstr::FrameSetup); 864 865 // MSVC x64's __chkstk needs to adjust %rsp. 866 // FIXME: %rax preserves the offset and should be available. 867 if (isSPUpdateNeeded) 868 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, 869 TII, *RegInfo); 870 871 if (isEAXAlive) { 872 // Restore EAX 873 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 874 X86::EAX), 875 StackPtr, false, NumBytes - 4); 876 MI->setFlag(MachineInstr::FrameSetup); 877 MBB.insert(MBBI, MI); 878 } 879 } else if (NumBytes) 880 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, 881 TII, *RegInfo); 882 883 if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { 884 // Mark end of stack pointer adjustment. 885 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 886 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 887 .addSym(Label); 888 889 if (!HasFP && NumBytes) { 890 // Define the current CFA rule to use the provided offset. 891 if (StackSize) { 892 MachineLocation SPDst(MachineLocation::VirtualFP); 893 MachineLocation SPSrc(MachineLocation::VirtualFP, 894 -StackSize + stackGrowth); 895 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 896 } else { 897 MachineLocation SPDst(StackPtr); 898 MachineLocation SPSrc(StackPtr, stackGrowth); 899 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 900 } 901 } 902 903 // Emit DWARF info specifying the offsets of the callee-saved registers. 904 if (PushedRegs) 905 emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); 906 } 907 908 // Darwin 10.7 and greater has support for compact unwind encoding. 909 if (GenerateCompactUnwind && 910 STI.isTargetDarwin() && !STI.getTargetTriple().isMacOSXVersionLT(10, 6)) 911 MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF)); 912} 913 914void X86FrameLowering::emitEpilogue(MachineFunction &MF, 915 MachineBasicBlock &MBB) const { 916 const MachineFrameInfo *MFI = MF.getFrameInfo(); 917 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 918 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 919 const X86InstrInfo &TII = *TM.getInstrInfo(); 920 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 921 assert(MBBI != MBB.end() && "Returning block has no instructions"); 922 unsigned RetOpcode = MBBI->getOpcode(); 923 DebugLoc DL = MBBI->getDebugLoc(); 924 bool Is64Bit = STI.is64Bit(); 925 unsigned StackAlign = getStackAlignment(); 926 unsigned SlotSize = RegInfo->getSlotSize(); 927 unsigned FramePtr = RegInfo->getFrameRegister(MF); 928 unsigned StackPtr = RegInfo->getStackRegister(); 929 930 switch (RetOpcode) { 931 default: 932 llvm_unreachable("Can only insert epilog into returning blocks"); 933 case X86::RET: 934 case X86::RETI: 935 case X86::TCRETURNdi: 936 case X86::TCRETURNri: 937 case X86::TCRETURNmi: 938 case X86::TCRETURNdi64: 939 case X86::TCRETURNri64: 940 case X86::TCRETURNmi64: 941 case X86::EH_RETURN: 942 case X86::EH_RETURN64: 943 break; // These are ok 944 } 945 946 // Get the number of bytes to allocate from the FrameInfo. 947 uint64_t StackSize = MFI->getStackSize(); 948 uint64_t MaxAlign = MFI->getMaxAlignment(); 949 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 950 uint64_t NumBytes = 0; 951 952 // If we're forcing a stack realignment we can't rely on just the frame 953 // info, we need to know the ABI stack alignment as well in case we 954 // have a call out. Otherwise just make sure we have some alignment - we'll 955 // go with the minimum. 956 if (ForceStackAlign) { 957 if (MFI->hasCalls()) 958 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 959 else 960 MaxAlign = MaxAlign ? MaxAlign : 4; 961 } 962 963 if (hasFP(MF)) { 964 // Calculate required stack adjustment. 965 uint64_t FrameSize = StackSize - SlotSize; 966 if (RegInfo->needsStackRealignment(MF)) 967 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; 968 969 NumBytes = FrameSize - CSSize; 970 971 // Pop EBP. 972 BuildMI(MBB, MBBI, DL, 973 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 974 } else { 975 NumBytes = StackSize - CSSize; 976 } 977 978 // Skip the callee-saved pop instructions. 979 MachineBasicBlock::iterator LastCSPop = MBBI; 980 while (MBBI != MBB.begin()) { 981 MachineBasicBlock::iterator PI = prior(MBBI); 982 unsigned Opc = PI->getOpcode(); 983 984 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 985 !PI->getDesc().isTerminator()) 986 break; 987 988 --MBBI; 989 } 990 991 DL = MBBI->getDebugLoc(); 992 993 // If there is an ADD32ri or SUB32ri of ESP immediately before this 994 // instruction, merge the two instructions. 995 if (NumBytes || MFI->hasVarSizedObjects()) 996 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 997 998 // If dynamic alloca is used, then reset esp to point to the last callee-saved 999 // slot before popping them off! Same applies for the case, when stack was 1000 // realigned. 1001 if (RegInfo->needsStackRealignment(MF)) { 1002 // We cannot use LEA here, because stack pointer was realigned. We need to 1003 // deallocate local frame back. 1004 if (CSSize) { 1005 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); 1006 MBBI = prior(LastCSPop); 1007 } 1008 1009 BuildMI(MBB, MBBI, DL, 1010 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1011 StackPtr).addReg(FramePtr); 1012 } else if (MFI->hasVarSizedObjects()) { 1013 if (CSSize) { 1014 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; 1015 MachineInstr *MI = 1016 addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), 1017 FramePtr, false, -CSSize); 1018 MBB.insert(MBBI, MI); 1019 } else { 1020 BuildMI(MBB, MBBI, DL, 1021 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) 1022 .addReg(FramePtr); 1023 } 1024 } else if (NumBytes) { 1025 // Adjust stack pointer back: ESP += numbytes. 1026 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); 1027 } 1028 1029 // We're returning from function via eh_return. 1030 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 1031 MBBI = MBB.getLastNonDebugInstr(); 1032 MachineOperand &DestAddr = MBBI->getOperand(0); 1033 assert(DestAddr.isReg() && "Offset should be in register!"); 1034 BuildMI(MBB, MBBI, DL, 1035 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1036 StackPtr).addReg(DestAddr.getReg()); 1037 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 1038 RetOpcode == X86::TCRETURNmi || 1039 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 1040 RetOpcode == X86::TCRETURNmi64) { 1041 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 1042 // Tail call return: adjust the stack pointer and jump to callee. 1043 MBBI = MBB.getLastNonDebugInstr(); 1044 MachineOperand &JumpTarget = MBBI->getOperand(0); 1045 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 1046 assert(StackAdjust.isImm() && "Expecting immediate value."); 1047 1048 // Adjust stack pointer. 1049 int StackAdj = StackAdjust.getImm(); 1050 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 1051 int Offset = 0; 1052 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 1053 1054 // Incoporate the retaddr area. 1055 Offset = StackAdj-MaxTCDelta; 1056 assert(Offset >= 0 && "Offset should never be negative"); 1057 1058 if (Offset) { 1059 // Check for possible merge with preceding ADD instruction. 1060 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1061 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); 1062 } 1063 1064 // Jump to label or value in register. 1065 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 1066 MachineInstrBuilder MIB = 1067 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) 1068 ? X86::TAILJMPd : X86::TAILJMPd64)); 1069 if (JumpTarget.isGlobal()) 1070 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 1071 JumpTarget.getTargetFlags()); 1072 else { 1073 assert(JumpTarget.isSymbol()); 1074 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 1075 JumpTarget.getTargetFlags()); 1076 } 1077 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 1078 MachineInstrBuilder MIB = 1079 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) 1080 ? X86::TAILJMPm : X86::TAILJMPm64)); 1081 for (unsigned i = 0; i != 5; ++i) 1082 MIB.addOperand(MBBI->getOperand(i)); 1083 } else if (RetOpcode == X86::TCRETURNri64) { 1084 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). 1085 addReg(JumpTarget.getReg(), RegState::Kill); 1086 } else { 1087 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 1088 addReg(JumpTarget.getReg(), RegState::Kill); 1089 } 1090 1091 MachineInstr *NewMI = prior(MBBI); 1092 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) 1093 NewMI->addOperand(MBBI->getOperand(i)); 1094 1095 // Delete the pseudo instruction TCRETURN. 1096 MBB.erase(MBBI); 1097 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 1098 (X86FI->getTCReturnAddrDelta() < 0)) { 1099 // Add the return addr area delta back since we are not tail calling. 1100 int delta = -1*X86FI->getTCReturnAddrDelta(); 1101 MBBI = MBB.getLastNonDebugInstr(); 1102 1103 // Check for possible merge with preceding ADD instruction. 1104 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1105 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); 1106 } 1107} 1108 1109int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { 1110 const X86RegisterInfo *RI = 1111 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 1112 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1113 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1114 uint64_t StackSize = MFI->getStackSize(); 1115 1116 if (RI->needsStackRealignment(MF)) { 1117 if (FI < 0) { 1118 // Skip the saved EBP. 1119 Offset += RI->getSlotSize(); 1120 } else { 1121 unsigned Align = MFI->getObjectAlignment(FI); 1122 assert((-(Offset + StackSize)) % Align == 0); 1123 Align = 0; 1124 return Offset + StackSize; 1125 } 1126 // FIXME: Support tail calls 1127 } else { 1128 if (!hasFP(MF)) 1129 return Offset + StackSize; 1130 1131 // Skip the saved EBP. 1132 Offset += RI->getSlotSize(); 1133 1134 // Skip the RETADDR move area 1135 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1136 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1137 if (TailCallReturnAddrDelta < 0) 1138 Offset -= TailCallReturnAddrDelta; 1139 } 1140 1141 return Offset; 1142} 1143 1144bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 1145 MachineBasicBlock::iterator MI, 1146 const std::vector<CalleeSavedInfo> &CSI, 1147 const TargetRegisterInfo *TRI) const { 1148 if (CSI.empty()) 1149 return false; 1150 1151 DebugLoc DL = MBB.findDebugLoc(MI); 1152 1153 MachineFunction &MF = *MBB.getParent(); 1154 1155 unsigned SlotSize = STI.is64Bit() ? 8 : 4; 1156 unsigned FPReg = TRI->getFrameRegister(MF); 1157 unsigned CalleeFrameSize = 0; 1158 1159 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 1160 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1161 1162 // Push GPRs. It increases frame size. 1163 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 1164 for (unsigned i = CSI.size(); i != 0; --i) { 1165 unsigned Reg = CSI[i-1].getReg(); 1166 if (!X86::GR64RegClass.contains(Reg) && 1167 !X86::GR32RegClass.contains(Reg)) 1168 continue; 1169 // Add the callee-saved register as live-in. It's killed at the spill. 1170 MBB.addLiveIn(Reg); 1171 if (Reg == FPReg) 1172 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 1173 continue; 1174 CalleeFrameSize += SlotSize; 1175 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 1176 .setMIFlag(MachineInstr::FrameSetup); 1177 } 1178 1179 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 1180 1181 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1182 // It can be done by spilling XMMs to stack frame. 1183 // Note that only Win64 ABI might spill XMMs. 1184 for (unsigned i = CSI.size(); i != 0; --i) { 1185 unsigned Reg = CSI[i-1].getReg(); 1186 if (X86::GR64RegClass.contains(Reg) || 1187 X86::GR32RegClass.contains(Reg)) 1188 continue; 1189 // Add the callee-saved register as live-in. It's killed at the spill. 1190 MBB.addLiveIn(Reg); 1191 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1192 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), 1193 RC, TRI); 1194 } 1195 1196 return true; 1197} 1198 1199bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1200 MachineBasicBlock::iterator MI, 1201 const std::vector<CalleeSavedInfo> &CSI, 1202 const TargetRegisterInfo *TRI) const { 1203 if (CSI.empty()) 1204 return false; 1205 1206 DebugLoc DL = MBB.findDebugLoc(MI); 1207 1208 MachineFunction &MF = *MBB.getParent(); 1209 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 1210 1211 // Reload XMMs from stack frame. 1212 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1213 unsigned Reg = CSI[i].getReg(); 1214 if (X86::GR64RegClass.contains(Reg) || 1215 X86::GR32RegClass.contains(Reg)) 1216 continue; 1217 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1218 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), 1219 RC, TRI); 1220 } 1221 1222 // POP GPRs. 1223 unsigned FPReg = TRI->getFrameRegister(MF); 1224 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1225 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1226 unsigned Reg = CSI[i].getReg(); 1227 if (!X86::GR64RegClass.contains(Reg) && 1228 !X86::GR32RegClass.contains(Reg)) 1229 continue; 1230 if (Reg == FPReg) 1231 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 1232 continue; 1233 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 1234 } 1235 return true; 1236} 1237 1238void 1239X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 1240 RegScavenger *RS) const { 1241 MachineFrameInfo *MFI = MF.getFrameInfo(); 1242 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 1243 unsigned SlotSize = RegInfo->getSlotSize(); 1244 1245 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1246 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1247 1248 if (TailCallReturnAddrDelta < 0) { 1249 // create RETURNADDR area 1250 // arg 1251 // arg 1252 // RETADDR 1253 // { ... 1254 // RETADDR area 1255 // ... 1256 // } 1257 // [EBP] 1258 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1259 (-1U*SlotSize)+TailCallReturnAddrDelta, true); 1260 } 1261 1262 if (hasFP(MF)) { 1263 assert((TailCallReturnAddrDelta <= 0) && 1264 "The Delta should always be zero or negative"); 1265 const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); 1266 1267 // Create a frame entry for the EBP register that must be saved. 1268 int FrameIdx = MFI->CreateFixedObject(SlotSize, 1269 -(int)SlotSize + 1270 TFI.getOffsetOfLocalArea() + 1271 TailCallReturnAddrDelta, 1272 true); 1273 assert(FrameIdx == MFI->getObjectIndexBegin() && 1274 "Slot for EBP register must be last in order to be found!"); 1275 FrameIdx = 0; 1276 } 1277} 1278