X86RegisterInfo.cpp revision 80c76436fe22a5481fac2cafe3c0a652fa6ddb31
1//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of the TargetRegisterInfo class. 11// This file is responsible for the frame pointer elimination optimization 12// on X86. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86RegisterInfo.h" 18#include "X86InstrBuilder.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/Constants.h" 23#include "llvm/Function.h" 24#include "llvm/Type.h" 25#include "llvm/CodeGen/ValueTypes.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineFunctionPass.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineLocation.h" 31#include "llvm/CodeGen/MachineModuleInfo.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/Target/TargetAsmInfo.h" 34#include "llvm/Target/TargetFrameInfo.h" 35#include "llvm/Target/TargetInstrInfo.h" 36#include "llvm/Target/TargetMachine.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/BitVector.h" 39#include "llvm/ADT/STLExtras.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/Compiler.h" 42#include "llvm/Support/ErrorHandling.h" 43using namespace llvm; 44 45X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, 46 const TargetInstrInfo &tii) 47 : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? 48 X86::ADJCALLSTACKDOWN64 : 49 X86::ADJCALLSTACKDOWN32, 50 tm.getSubtarget<X86Subtarget>().is64Bit() ? 51 X86::ADJCALLSTACKUP64 : 52 X86::ADJCALLSTACKUP32), 53 TM(tm), TII(tii) { 54 // Cache some information. 55 const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); 56 Is64Bit = Subtarget->is64Bit(); 57 IsWin64 = Subtarget->isTargetWin64(); 58 StackAlign = TM.getFrameInfo()->getStackAlignment(); 59 60 if (Is64Bit) { 61 SlotSize = 8; 62 StackPtr = X86::RSP; 63 FramePtr = X86::RBP; 64 } else { 65 SlotSize = 4; 66 StackPtr = X86::ESP; 67 FramePtr = X86::EBP; 68 } 69} 70 71/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF 72/// specific numbering, used in debug info and exception tables. 73int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { 74 const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); 75 unsigned Flavour = DWARFFlavour::X86_64; 76 77 if (!Subtarget->is64Bit()) { 78 if (Subtarget->isTargetDarwin()) { 79 if (isEH) 80 Flavour = DWARFFlavour::X86_32_DarwinEH; 81 else 82 Flavour = DWARFFlavour::X86_32_Generic; 83 } else if (Subtarget->isTargetCygMing()) { 84 // Unsupported by now, just quick fallback 85 Flavour = DWARFFlavour::X86_32_Generic; 86 } else { 87 Flavour = DWARFFlavour::X86_32_Generic; 88 } 89 } 90 91 return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); 92} 93 94/// getX86RegNum - This function maps LLVM register identifiers to their X86 95/// specific numbering, which is used in various places encoding instructions. 96unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { 97 switch(RegNo) { 98 case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; 99 case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; 100 case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; 101 case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; 102 case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: 103 return N86::ESP; 104 case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: 105 return N86::EBP; 106 case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: 107 return N86::ESI; 108 case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: 109 return N86::EDI; 110 111 case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: 112 return N86::EAX; 113 case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: 114 return N86::ECX; 115 case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: 116 return N86::EDX; 117 case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: 118 return N86::EBX; 119 case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: 120 return N86::ESP; 121 case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: 122 return N86::EBP; 123 case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: 124 return N86::ESI; 125 case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: 126 return N86::EDI; 127 128 case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: 129 case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: 130 return RegNo-X86::ST0; 131 132 case X86::XMM0: case X86::XMM8: case X86::MM0: 133 return 0; 134 case X86::XMM1: case X86::XMM9: case X86::MM1: 135 return 1; 136 case X86::XMM2: case X86::XMM10: case X86::MM2: 137 return 2; 138 case X86::XMM3: case X86::XMM11: case X86::MM3: 139 return 3; 140 case X86::XMM4: case X86::XMM12: case X86::MM4: 141 return 4; 142 case X86::XMM5: case X86::XMM13: case X86::MM5: 143 return 5; 144 case X86::XMM6: case X86::XMM14: case X86::MM6: 145 return 6; 146 case X86::XMM7: case X86::XMM15: case X86::MM7: 147 return 7; 148 149 default: 150 assert(isVirtualRegister(RegNo) && "Unknown physical register!"); 151 llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); 152 return 0; 153 } 154} 155 156const TargetRegisterClass * 157X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, 158 const TargetRegisterClass *B, 159 unsigned SubIdx) const { 160 switch (SubIdx) { 161 default: return 0; 162 case 1: 163 // 8-bit 164 if (B == &X86::GR8RegClass) { 165 if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) 166 return A; 167 } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { 168 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 169 A == &X86::GR64_NOREXRegClass || 170 A == &X86::GR64_NOSPRegClass || 171 A == &X86::GR64_NOREX_NOSPRegClass) 172 return &X86::GR64_ABCDRegClass; 173 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || 174 A == &X86::GR32_NOREXRegClass || 175 A == &X86::GR32_NOSPRegClass) 176 return &X86::GR32_ABCDRegClass; 177 else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || 178 A == &X86::GR16_NOREXRegClass) 179 return &X86::GR16_ABCDRegClass; 180 } else if (B == &X86::GR8_NOREXRegClass) { 181 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || 182 A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) 183 return &X86::GR64_NOREXRegClass; 184 else if (A == &X86::GR64_ABCDRegClass) 185 return &X86::GR64_ABCDRegClass; 186 else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || 187 A == &X86::GR32_NOSPRegClass) 188 return &X86::GR32_NOREXRegClass; 189 else if (A == &X86::GR32_ABCDRegClass) 190 return &X86::GR32_ABCDRegClass; 191 else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) 192 return &X86::GR16_NOREXRegClass; 193 else if (A == &X86::GR16_ABCDRegClass) 194 return &X86::GR16_ABCDRegClass; 195 } 196 break; 197 case 2: 198 // 8-bit hi 199 if (B == &X86::GR8_ABCD_HRegClass) { 200 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 201 A == &X86::GR64_NOREXRegClass || 202 A == &X86::GR64_NOSPRegClass || 203 A == &X86::GR64_NOREX_NOSPRegClass) 204 return &X86::GR64_ABCDRegClass; 205 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || 206 A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) 207 return &X86::GR32_ABCDRegClass; 208 else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || 209 A == &X86::GR16_NOREXRegClass) 210 return &X86::GR16_ABCDRegClass; 211 } 212 break; 213 case 3: 214 // 16-bit 215 if (B == &X86::GR16RegClass) { 216 if (A->getSize() == 4 || A->getSize() == 8) 217 return A; 218 } else if (B == &X86::GR16_ABCDRegClass) { 219 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 220 A == &X86::GR64_NOREXRegClass || 221 A == &X86::GR64_NOSPRegClass || 222 A == &X86::GR64_NOREX_NOSPRegClass) 223 return &X86::GR64_ABCDRegClass; 224 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || 225 A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) 226 return &X86::GR32_ABCDRegClass; 227 } else if (B == &X86::GR16_NOREXRegClass) { 228 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || 229 A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) 230 return &X86::GR64_NOREXRegClass; 231 else if (A == &X86::GR64_ABCDRegClass) 232 return &X86::GR64_ABCDRegClass; 233 else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || 234 A == &X86::GR32_NOSPRegClass) 235 return &X86::GR32_NOREXRegClass; 236 else if (A == &X86::GR32_ABCDRegClass) 237 return &X86::GR64_ABCDRegClass; 238 } 239 break; 240 case 4: 241 // 32-bit 242 if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { 243 if (A->getSize() == 8) 244 return A; 245 } else if (B == &X86::GR32_ABCDRegClass) { 246 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 247 A == &X86::GR64_NOREXRegClass || 248 A == &X86::GR64_NOSPRegClass || 249 A == &X86::GR64_NOREX_NOSPRegClass) 250 return &X86::GR64_ABCDRegClass; 251 } else if (B == &X86::GR32_NOREXRegClass) { 252 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || 253 A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) 254 return &X86::GR64_NOREXRegClass; 255 else if (A == &X86::GR64_ABCDRegClass) 256 return &X86::GR64_ABCDRegClass; 257 } 258 break; 259 } 260 return 0; 261} 262 263const TargetRegisterClass * 264X86RegisterInfo::getPointerRegClass(unsigned Kind) const { 265 switch (Kind) { 266 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); 267 case 0: // Normal GPRs. 268 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 269 return &X86::GR64RegClass; 270 return &X86::GR32RegClass; 271 case 1: // Normal GRPs except the stack pointer (for encoding reasons). 272 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 273 return &X86::GR64_NOSPRegClass; 274 return &X86::GR32_NOSPRegClass; 275 } 276} 277 278const TargetRegisterClass * 279X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 280 if (RC == &X86::CCRRegClass) { 281 if (Is64Bit) 282 return &X86::GR64RegClass; 283 else 284 return &X86::GR32RegClass; 285 } 286 return NULL; 287} 288 289const unsigned * 290X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 291 bool callsEHReturn = false; 292 293 if (MF) { 294 const MachineFrameInfo *MFI = MF->getFrameInfo(); 295 const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 296 callsEHReturn = (MMI ? MMI->callsEHReturn() : false); 297 } 298 299 static const unsigned CalleeSavedRegs32Bit[] = { 300 X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 301 }; 302 303 static const unsigned CalleeSavedRegs32EHRet[] = { 304 X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 305 }; 306 307 static const unsigned CalleeSavedRegs64Bit[] = { 308 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 309 }; 310 311 static const unsigned CalleeSavedRegs64EHRet[] = { 312 X86::RAX, X86::RDX, X86::RBX, X86::R12, 313 X86::R13, X86::R14, X86::R15, X86::RBP, 0 314 }; 315 316 static const unsigned CalleeSavedRegsWin64[] = { 317 X86::RBX, X86::RBP, X86::RDI, X86::RSI, 318 X86::R12, X86::R13, X86::R14, X86::R15, 319 X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9, 320 X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, 321 X86::XMM14, X86::XMM15, 0 322 }; 323 324 if (Is64Bit) { 325 if (IsWin64) 326 return CalleeSavedRegsWin64; 327 else 328 return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit); 329 } else { 330 return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit); 331 } 332} 333 334const TargetRegisterClass* const* 335X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { 336 bool callsEHReturn = false; 337 338 if (MF) { 339 const MachineFrameInfo *MFI = MF->getFrameInfo(); 340 const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 341 callsEHReturn = (MMI ? MMI->callsEHReturn() : false); 342 } 343 344 static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { 345 &X86::GR32RegClass, &X86::GR32RegClass, 346 &X86::GR32RegClass, &X86::GR32RegClass, 0 347 }; 348 static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { 349 &X86::GR32RegClass, &X86::GR32RegClass, 350 &X86::GR32RegClass, &X86::GR32RegClass, 351 &X86::GR32RegClass, &X86::GR32RegClass, 0 352 }; 353 static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { 354 &X86::GR64RegClass, &X86::GR64RegClass, 355 &X86::GR64RegClass, &X86::GR64RegClass, 356 &X86::GR64RegClass, &X86::GR64RegClass, 0 357 }; 358 static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { 359 &X86::GR64RegClass, &X86::GR64RegClass, 360 &X86::GR64RegClass, &X86::GR64RegClass, 361 &X86::GR64RegClass, &X86::GR64RegClass, 362 &X86::GR64RegClass, &X86::GR64RegClass, 0 363 }; 364 static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { 365 &X86::GR64RegClass, &X86::GR64RegClass, 366 &X86::GR64RegClass, &X86::GR64RegClass, 367 &X86::GR64RegClass, &X86::GR64RegClass, 368 &X86::GR64RegClass, &X86::GR64RegClass, 369 &X86::VR128RegClass, &X86::VR128RegClass, 370 &X86::VR128RegClass, &X86::VR128RegClass, 371 &X86::VR128RegClass, &X86::VR128RegClass, 372 &X86::VR128RegClass, &X86::VR128RegClass, 373 &X86::VR128RegClass, &X86::VR128RegClass, 0 374 }; 375 376 if (Is64Bit) { 377 if (IsWin64) 378 return CalleeSavedRegClassesWin64; 379 else 380 return (callsEHReturn ? 381 CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); 382 } else { 383 return (callsEHReturn ? 384 CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); 385 } 386} 387 388BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { 389 BitVector Reserved(getNumRegs()); 390 // Set the stack-pointer register and its aliases as reserved. 391 Reserved.set(X86::RSP); 392 Reserved.set(X86::ESP); 393 Reserved.set(X86::SP); 394 Reserved.set(X86::SPL); 395 396 // Set the frame-pointer register and its aliases as reserved if needed. 397 if (hasFP(MF)) { 398 Reserved.set(X86::RBP); 399 Reserved.set(X86::EBP); 400 Reserved.set(X86::BP); 401 Reserved.set(X86::BPL); 402 } 403 404 // Mark the x87 stack registers as reserved, since they don't behave normally 405 // with respect to liveness. We don't fully model the effects of x87 stack 406 // pushes and pops after stackification. 407 Reserved.set(X86::ST0); 408 Reserved.set(X86::ST1); 409 Reserved.set(X86::ST2); 410 Reserved.set(X86::ST3); 411 Reserved.set(X86::ST4); 412 Reserved.set(X86::ST5); 413 Reserved.set(X86::ST6); 414 Reserved.set(X86::ST7); 415 return Reserved; 416} 417 418//===----------------------------------------------------------------------===// 419// Stack Frame Processing methods 420//===----------------------------------------------------------------------===// 421 422static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { 423 unsigned MaxAlign = 0; 424 425 for (int i = FFI->getObjectIndexBegin(), 426 e = FFI->getObjectIndexEnd(); i != e; ++i) { 427 if (FFI->isDeadObjectIndex(i)) 428 continue; 429 430 unsigned Align = FFI->getObjectAlignment(i); 431 MaxAlign = std::max(MaxAlign, Align); 432 } 433 434 return MaxAlign; 435} 436 437/// hasFP - Return true if the specified function should have a dedicated frame 438/// pointer register. This is true if the function has variable sized allocas 439/// or if frame pointer elimination is disabled. 440bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { 441 const MachineFrameInfo *MFI = MF.getFrameInfo(); 442 const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 443 444 return (NoFramePointerElim || 445 needsStackRealignment(MF) || 446 MFI->hasVarSizedObjects() || 447 MFI->isFrameAddressTaken() || 448 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 449 (MMI && MMI->callsUnwindInit())); 450} 451 452bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { 453 const MachineFrameInfo *MFI = MF.getFrameInfo(); 454 455 // FIXME: Currently we don't support stack realignment for functions with 456 // variable-sized allocas 457 return (RealignStack && 458 (MFI->getMaxAlignment() > StackAlign && 459 !MFI->hasVarSizedObjects())); 460} 461 462bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { 463 return !MF.getFrameInfo()->hasVarSizedObjects(); 464} 465 466bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, 467 int &FrameIdx) const { 468 if (Reg == FramePtr && hasFP(MF)) { 469 FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); 470 return true; 471 } 472 return false; 473} 474 475int 476X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { 477 const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); 478 MachineFrameInfo *MFI = MF.getFrameInfo(); 479 int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); 480 uint64_t StackSize = MFI->getStackSize(); 481 482 if (needsStackRealignment(MF)) { 483 if (FI < 0) { 484 // Skip the saved EBP. 485 Offset += SlotSize; 486 } else { 487 unsigned Align = MFI->getObjectAlignment(FI); 488 assert( (-(Offset + StackSize)) % Align == 0); 489 Align = 0; 490 return Offset + StackSize; 491 } 492 // FIXME: Support tail calls 493 } else { 494 if (!hasFP(MF)) 495 return Offset + StackSize; 496 497 // Skip the saved EBP. 498 Offset += SlotSize; 499 500 // Skip the RETADDR move area 501 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 502 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 503 if (TailCallReturnAddrDelta < 0) 504 Offset -= TailCallReturnAddrDelta; 505 } 506 507 return Offset; 508} 509 510void X86RegisterInfo:: 511eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 512 MachineBasicBlock::iterator I) const { 513 if (!hasReservedCallFrame(MF)) { 514 // If the stack pointer can be changed after prologue, turn the 515 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 516 // adjcallstackdown instruction into 'add ESP, <amt>' 517 // TODO: consider using push / pop instead of sub + store / add 518 MachineInstr *Old = I; 519 uint64_t Amount = Old->getOperand(0).getImm(); 520 if (Amount != 0) { 521 // We need to keep the stack aligned properly. To do this, we round the 522 // amount of space needed for the outgoing arguments up to the next 523 // alignment boundary. 524 Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; 525 526 MachineInstr *New = 0; 527 if (Old->getOpcode() == getCallFrameSetupOpcode()) { 528 New = BuildMI(MF, Old->getDebugLoc(), 529 TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), 530 StackPtr) 531 .addReg(StackPtr) 532 .addImm(Amount); 533 } else { 534 assert(Old->getOpcode() == getCallFrameDestroyOpcode()); 535 536 // Factor out the amount the callee already popped. 537 uint64_t CalleeAmt = Old->getOperand(1).getImm(); 538 Amount -= CalleeAmt; 539 540 if (Amount) { 541 unsigned Opc = (Amount < 128) ? 542 (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : 543 (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); 544 New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) 545 .addReg(StackPtr) 546 .addImm(Amount); 547 } 548 } 549 550 if (New) { 551 // The EFLAGS implicit def is dead. 552 New->getOperand(3).setIsDead(); 553 554 // Replace the pseudo instruction with a new instruction. 555 MBB.insert(I, New); 556 } 557 } 558 } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { 559 // If we are performing frame pointer elimination and if the callee pops 560 // something off the stack pointer, add it back. We do this until we have 561 // more advanced stack pointer tracking ability. 562 if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { 563 unsigned Opc = (CalleeAmt < 128) ? 564 (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : 565 (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); 566 MachineInstr *Old = I; 567 MachineInstr *New = 568 BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), 569 StackPtr) 570 .addReg(StackPtr) 571 .addImm(CalleeAmt); 572 573 // The EFLAGS implicit def is dead. 574 New->getOperand(3).setIsDead(); 575 MBB.insert(I, New); 576 } 577 } 578 579 MBB.erase(I); 580} 581 582void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 583 int SPAdj, RegScavenger *RS) const{ 584 assert(SPAdj == 0 && "Unexpected"); 585 586 unsigned i = 0; 587 MachineInstr &MI = *II; 588 MachineFunction &MF = *MI.getParent()->getParent(); 589 590 while (!MI.getOperand(i).isFI()) { 591 ++i; 592 assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); 593 } 594 595 int FrameIndex = MI.getOperand(i).getIndex(); 596 unsigned BasePtr; 597 598 if (needsStackRealignment(MF)) 599 BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); 600 else 601 BasePtr = (hasFP(MF) ? FramePtr : StackPtr); 602 603 // This must be part of a four operand memory reference. Replace the 604 // FrameIndex with base register with EBP. Add an offset to the offset. 605 MI.getOperand(i).ChangeToRegister(BasePtr, false); 606 607 // Now add the frame object offset to the offset from EBP. 608 if (MI.getOperand(i+3).isImm()) { 609 // Offset is a 32-bit integer. 610 int Offset = getFrameIndexOffset(MF, FrameIndex) + 611 (int)(MI.getOperand(i + 3).getImm()); 612 613 MI.getOperand(i + 3).ChangeToImmediate(Offset); 614 } else { 615 // Offset is symbolic. This is extremely rare. 616 uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + 617 (uint64_t)MI.getOperand(i+3).getOffset(); 618 MI.getOperand(i+3).setOffset(Offset); 619 } 620} 621 622void 623X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 624 RegScavenger *RS) const { 625 MachineFrameInfo *MFI = MF.getFrameInfo(); 626 627 // Calculate and set max stack object alignment early, so we can decide 628 // whether we will need stack realignment (and thus FP). 629 unsigned MaxAlign = std::max(MFI->getMaxAlignment(), 630 calculateMaxStackAlignment(MFI)); 631 632 MFI->setMaxAlignment(MaxAlign); 633 634 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 635 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 636 637 if (TailCallReturnAddrDelta < 0) { 638 // create RETURNADDR area 639 // arg 640 // arg 641 // RETADDR 642 // { ... 643 // RETADDR area 644 // ... 645 // } 646 // [EBP] 647 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 648 (-1*SlotSize)+TailCallReturnAddrDelta); 649 } 650 651 if (hasFP(MF)) { 652 assert((TailCallReturnAddrDelta <= 0) && 653 "The Delta should always be zero or negative"); 654 const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); 655 656 // Create a frame entry for the EBP register that must be saved. 657 int FrameIdx = MFI->CreateFixedObject(SlotSize, 658 -(int)SlotSize + 659 TFI.getOffsetOfLocalArea() + 660 TailCallReturnAddrDelta); 661 assert(FrameIdx == MFI->getObjectIndexBegin() && 662 "Slot for EBP register must be last in order to be found!"); 663 FrameIdx = 0; 664 } 665} 666 667/// emitSPUpdate - Emit a series of instructions to increment / decrement the 668/// stack pointer by a constant value. 669static 670void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 671 unsigned StackPtr, int64_t NumBytes, bool Is64Bit, 672 const TargetInstrInfo &TII) { 673 bool isSub = NumBytes < 0; 674 uint64_t Offset = isSub ? -NumBytes : NumBytes; 675 unsigned Opc = isSub 676 ? ((Offset < 128) ? 677 (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : 678 (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) 679 : ((Offset < 128) ? 680 (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : 681 (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); 682 uint64_t Chunk = (1LL << 31) - 1; 683 DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : 684 DebugLoc::getUnknownLoc()); 685 686 while (Offset) { 687 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 688 MachineInstr *MI = 689 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 690 .addReg(StackPtr) 691 .addImm(ThisVal); 692 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 693 Offset -= ThisVal; 694 } 695} 696 697/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 698static 699void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 700 unsigned StackPtr, uint64_t *NumBytes = NULL) { 701 if (MBBI == MBB.begin()) return; 702 703 MachineBasicBlock::iterator PI = prior(MBBI); 704 unsigned Opc = PI->getOpcode(); 705 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 706 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 707 PI->getOperand(0).getReg() == StackPtr) { 708 if (NumBytes) 709 *NumBytes += PI->getOperand(2).getImm(); 710 MBB.erase(PI); 711 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 712 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 713 PI->getOperand(0).getReg() == StackPtr) { 714 if (NumBytes) 715 *NumBytes -= PI->getOperand(2).getImm(); 716 MBB.erase(PI); 717 } 718} 719 720/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. 721static 722void mergeSPUpdatesDown(MachineBasicBlock &MBB, 723 MachineBasicBlock::iterator &MBBI, 724 unsigned StackPtr, uint64_t *NumBytes = NULL) { 725 // FIXME: THIS ISN'T RUN!!! 726 return; 727 728 if (MBBI == MBB.end()) return; 729 730 MachineBasicBlock::iterator NI = next(MBBI); 731 if (NI == MBB.end()) return; 732 733 unsigned Opc = NI->getOpcode(); 734 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 735 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 736 NI->getOperand(0).getReg() == StackPtr) { 737 if (NumBytes) 738 *NumBytes -= NI->getOperand(2).getImm(); 739 MBB.erase(NI); 740 MBBI = NI; 741 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 742 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 743 NI->getOperand(0).getReg() == StackPtr) { 744 if (NumBytes) 745 *NumBytes += NI->getOperand(2).getImm(); 746 MBB.erase(NI); 747 MBBI = NI; 748 } 749} 750 751/// mergeSPUpdates - Checks the instruction before/after the passed 752/// instruction. If it is an ADD/SUB instruction it is deleted argument and the 753/// stack adjustment is returned as a positive value for ADD and a negative for 754/// SUB. 755static int mergeSPUpdates(MachineBasicBlock &MBB, 756 MachineBasicBlock::iterator &MBBI, 757 unsigned StackPtr, 758 bool doMergeWithPrevious) { 759 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 760 (!doMergeWithPrevious && MBBI == MBB.end())) 761 return 0; 762 763 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 764 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); 765 unsigned Opc = PI->getOpcode(); 766 int Offset = 0; 767 768 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 769 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 770 PI->getOperand(0).getReg() == StackPtr){ 771 Offset += PI->getOperand(2).getImm(); 772 MBB.erase(PI); 773 if (!doMergeWithPrevious) MBBI = NI; 774 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 775 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 776 PI->getOperand(0).getReg() == StackPtr) { 777 Offset -= PI->getOperand(2).getImm(); 778 MBB.erase(PI); 779 if (!doMergeWithPrevious) MBBI = NI; 780 } 781 782 return Offset; 783} 784 785void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, 786 unsigned LabelId, 787 unsigned FramePtr) const { 788 MachineFrameInfo *MFI = MF.getFrameInfo(); 789 MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 790 if (!MMI) return; 791 792 // Add callee saved registers to move list. 793 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 794 if (CSI.empty()) return; 795 796 std::vector<MachineMove> &Moves = MMI->getFrameMoves(); 797 const TargetData *TD = MF.getTarget().getTargetData(); 798 bool HasFP = hasFP(MF); 799 800 // Calculate amount of bytes used for return address storing. 801 int stackGrowth = 802 (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == 803 TargetFrameInfo::StackGrowsUp ? 804 TD->getPointerSize() : -TD->getPointerSize()); 805 806 // FIXME: This is dirty hack. The code itself is pretty mess right now. 807 // It should be rewritten from scratch and generalized sometimes. 808 809 // Determine maximum offset (minumum due to stack growth). 810 int64_t MaxOffset = 0; 811 for (std::vector<CalleeSavedInfo>::const_iterator 812 I = CSI.begin(), E = CSI.end(); I != E; ++I) 813 MaxOffset = std::min(MaxOffset, 814 MFI->getObjectOffset(I->getFrameIdx())); 815 816 // Calculate offsets. 817 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 818 for (std::vector<CalleeSavedInfo>::const_iterator 819 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 820 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 821 unsigned Reg = I->getReg(); 822 Offset = MaxOffset - Offset + saveAreaOffset; 823 824 // Don't output a new machine move if we're re-saving the frame 825 // pointer. This happens when the PrologEpilogInserter has inserted an extra 826 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 827 // generates one when frame pointers are used. If we generate a "machine 828 // move" for this extra "PUSH", the linker will lose track of the fact that 829 // the frame pointer should have the value of the first "PUSH" when it's 830 // trying to unwind. 831 // 832 // FIXME: This looks inelegant. It's possibly correct, but it's covering up 833 // another bug. I.e., one where we generate a prolog like this: 834 // 835 // pushl %ebp 836 // movl %esp, %ebp 837 // pushl %ebp 838 // pushl %esi 839 // ... 840 // 841 // The immediate re-push of EBP is unnecessary. At the least, it's an 842 // optimization bug. EBP can be used as a scratch register in certain 843 // cases, but probably not when we have a frame pointer. 844 if (HasFP && FramePtr == Reg) 845 continue; 846 847 MachineLocation CSDst(MachineLocation::VirtualFP, Offset); 848 MachineLocation CSSrc(Reg); 849 Moves.push_back(MachineMove(LabelId, CSDst, CSSrc)); 850 } 851} 852 853/// emitPrologue - Push callee-saved registers onto the stack, which 854/// automatically adjust the stack pointer. Adjust the stack pointer to allocate 855/// space for local variables. Also emit labels used by the exception handler to 856/// generate the exception handling frames. 857void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { 858 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 859 MachineBasicBlock::iterator MBBI = MBB.begin(); 860 MachineFrameInfo *MFI = MF.getFrameInfo(); 861 const Function *Fn = MF.getFunction(); 862 const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); 863 MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 864 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 865 bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || 866 !Fn->doesNotThrow() || UnwindTablesMandatory; 867 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 868 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 869 bool HasFP = hasFP(MF); 870 DebugLoc DL; 871 872 // Add RETADDR move area to callee saved frame size. 873 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 874 if (TailCallReturnAddrDelta < 0) 875 X86FI->setCalleeSavedFrameSize( 876 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 877 878 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 879 // function, and use up to 128 bytes of stack space, don't have a frame 880 // pointer, calls, or dynamic alloca then we do not need to adjust the 881 // stack pointer (we fit in the Red Zone). 882 if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && 883 !needsStackRealignment(MF) && 884 !MFI->hasVarSizedObjects() && // No dynamic alloca. 885 !MFI->hasCalls() && // No calls. 886 !Subtarget->isTargetWin64()) { // Win64 has no Red Zone 887 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 888 if (HasFP) MinSize += SlotSize; 889 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 890 MFI->setStackSize(StackSize); 891 } else if (Subtarget->isTargetWin64()) { 892 // We need to always allocate 32 bytes as register spill area. 893 // FIXME: We might reuse these 32 bytes for leaf functions. 894 StackSize += 32; 895 MFI->setStackSize(StackSize); 896 } 897 898 // Insert stack pointer adjustment for later moving of return addr. Only 899 // applies to tail call optimized functions where the callee argument stack 900 // size is bigger than the callers. 901 if (TailCallReturnAddrDelta < 0) { 902 MachineInstr *MI = 903 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), 904 StackPtr) 905 .addReg(StackPtr) 906 .addImm(-TailCallReturnAddrDelta); 907 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 908 } 909 910 // Mapping for machine moves: 911 // 912 // DST: VirtualFP AND 913 // SRC: VirtualFP => DW_CFA_def_cfa_offset 914 // ELSE => DW_CFA_def_cfa 915 // 916 // SRC: VirtualFP AND 917 // DST: Register => DW_CFA_def_cfa_register 918 // 919 // ELSE 920 // OFFSET < 0 => DW_CFA_offset_extended_sf 921 // REG < 64 => DW_CFA_offset + Reg 922 // ELSE => DW_CFA_offset_extended 923 924 std::vector<MachineMove> &Moves = MMI->getFrameMoves(); 925 const TargetData *TD = MF.getTarget().getTargetData(); 926 uint64_t NumBytes = 0; 927 int stackGrowth = 928 (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == 929 TargetFrameInfo::StackGrowsUp ? 930 TD->getPointerSize() : -TD->getPointerSize()); 931 932 if (HasFP) { 933 // Calculate required stack adjustment. 934 uint64_t FrameSize = StackSize - SlotSize; 935 if (needsStackRealignment(MF)) 936 FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 937 938 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 939 940 // Get the offset of the stack slot for the EBP register, which is 941 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 942 // Update the frame offset adjustment. 943 MFI->setOffsetAdjustment(-NumBytes); 944 945 // Save EBP/RBP into the appropriate stack slot. 946 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 947 .addReg(FramePtr, RegState::Kill); 948 949 if (needsFrameMoves) { 950 // Mark the place where EBP/RBP was saved. 951 unsigned FrameLabelId = MMI->NextLabelID(); 952 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); 953 954 // Define the current CFA rule to use the provided offset. 955 if (StackSize) { 956 MachineLocation SPDst(MachineLocation::VirtualFP); 957 MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); 958 Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); 959 } else { 960 // FIXME: Verify & implement for FP 961 MachineLocation SPDst(StackPtr); 962 MachineLocation SPSrc(StackPtr, stackGrowth); 963 Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); 964 } 965 966 // Change the rule for the FramePtr to be an "offset" rule. 967 MachineLocation FPDst(MachineLocation::VirtualFP, 968 2 * stackGrowth); 969 MachineLocation FPSrc(FramePtr); 970 Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); 971 } 972 973 // Update EBP with the new base value... 974 BuildMI(MBB, MBBI, DL, 975 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 976 .addReg(StackPtr); 977 978 if (needsFrameMoves) { 979 // Mark effective beginning of when frame pointer becomes valid. 980 unsigned FrameLabelId = MMI->NextLabelID(); 981 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); 982 983 // Define the current CFA to use the EBP/RBP register. 984 MachineLocation FPDst(FramePtr); 985 MachineLocation FPSrc(MachineLocation::VirtualFP); 986 Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); 987 } 988 989 // Mark the FramePtr as live-in in every block except the entry. 990 for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); 991 I != E; ++I) 992 I->addLiveIn(FramePtr); 993 994 // Realign stack 995 if (needsStackRealignment(MF)) { 996 MachineInstr *MI = 997 BuildMI(MBB, MBBI, DL, 998 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), 999 StackPtr).addReg(StackPtr).addImm(-MaxAlign); 1000 1001 // The EFLAGS implicit def is dead. 1002 MI->getOperand(3).setIsDead(); 1003 } 1004 } else { 1005 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 1006 } 1007 1008 // Skip the callee-saved push instructions. 1009 bool PushedRegs = false; 1010 int StackOffset = 2 * stackGrowth; 1011 1012 while (MBBI != MBB.end() && 1013 (MBBI->getOpcode() == X86::PUSH32r || 1014 MBBI->getOpcode() == X86::PUSH64r)) { 1015 PushedRegs = true; 1016 ++MBBI; 1017 1018 if (!HasFP && needsFrameMoves) { 1019 // Mark callee-saved push instruction. 1020 unsigned LabelId = MMI->NextLabelID(); 1021 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); 1022 1023 // Define the current CFA rule to use the provided offset. 1024 unsigned Ptr = StackSize ? 1025 MachineLocation::VirtualFP : StackPtr; 1026 MachineLocation SPDst(Ptr); 1027 MachineLocation SPSrc(Ptr, StackOffset); 1028 Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); 1029 StackOffset += stackGrowth; 1030 } 1031 } 1032 1033 if (MBBI != MBB.end()) 1034 DL = MBBI->getDebugLoc(); 1035 1036 // Adjust stack pointer: ESP -= numbytes. 1037 if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { 1038 // Check, whether EAX is livein for this function. 1039 bool isEAXAlive = false; 1040 for (MachineRegisterInfo::livein_iterator 1041 II = MF.getRegInfo().livein_begin(), 1042 EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { 1043 unsigned Reg = II->first; 1044 isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || 1045 Reg == X86::AH || Reg == X86::AL); 1046 } 1047 1048 // Function prologue calls _alloca to probe the stack when allocating more 1049 // than 4k bytes in one go. Touching the stack at 4K increments is necessary 1050 // to ensure that the guard pages used by the OS virtual memory manager are 1051 // allocated in correct sequence. 1052 if (!isEAXAlive) { 1053 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 1054 .addImm(NumBytes); 1055 BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) 1056 .addExternalSymbol("_alloca"); 1057 } else { 1058 // Save EAX 1059 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 1060 .addReg(X86::EAX, RegState::Kill); 1061 1062 // Allocate NumBytes-4 bytes on stack. We'll also use 4 already 1063 // allocated bytes for EAX. 1064 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 1065 .addImm(NumBytes - 4); 1066 BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) 1067 .addExternalSymbol("_alloca"); 1068 1069 // Restore EAX 1070 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 1071 X86::EAX), 1072 StackPtr, false, NumBytes - 4); 1073 MBB.insert(MBBI, MI); 1074 } 1075 } else if (NumBytes) { 1076 // If there is an SUB32ri of ESP immediately before this instruction, merge 1077 // the two. This can be the case when tail call elimination is enabled and 1078 // the callee has more arguments then the caller. 1079 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 1080 1081 // If there is an ADD32ri or SUB32ri of ESP immediately after this 1082 // instruction, merge the two instructions. 1083 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 1084 1085 if (NumBytes) 1086 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); 1087 } 1088 1089 if (NumBytes && needsFrameMoves) { 1090 // Mark end of stack pointer adjustment. 1091 unsigned LabelId = MMI->NextLabelID(); 1092 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); 1093 1094 if (!HasFP) { 1095 // Define the current CFA rule to use the provided offset. 1096 if (StackSize) { 1097 MachineLocation SPDst(MachineLocation::VirtualFP); 1098 MachineLocation SPSrc(MachineLocation::VirtualFP, 1099 -StackSize + stackGrowth); 1100 Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); 1101 } else { 1102 // FIXME: Verify & implement for FP 1103 MachineLocation SPDst(StackPtr); 1104 MachineLocation SPSrc(StackPtr, stackGrowth); 1105 Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); 1106 } 1107 } 1108 1109 // Emit DWARF info specifying the offsets of the callee-saved registers. 1110 if (PushedRegs) 1111 emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr); 1112 } 1113} 1114 1115void X86RegisterInfo::emitEpilogue(MachineFunction &MF, 1116 MachineBasicBlock &MBB) const { 1117 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1118 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1119 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1120 unsigned RetOpcode = MBBI->getOpcode(); 1121 DebugLoc DL = MBBI->getDebugLoc(); 1122 1123 switch (RetOpcode) { 1124 default: 1125 llvm_unreachable("Can only insert epilog into returning blocks"); 1126 case X86::RET: 1127 case X86::RETI: 1128 case X86::TCRETURNdi: 1129 case X86::TCRETURNri: 1130 case X86::TCRETURNri64: 1131 case X86::TCRETURNdi64: 1132 case X86::EH_RETURN: 1133 case X86::EH_RETURN64: 1134 case X86::TAILJMPd: 1135 case X86::TAILJMPr: 1136 case X86::TAILJMPm: 1137 break; // These are ok 1138 } 1139 1140 // Get the number of bytes to allocate from the FrameInfo. 1141 uint64_t StackSize = MFI->getStackSize(); 1142 uint64_t MaxAlign = MFI->getMaxAlignment(); 1143 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1144 uint64_t NumBytes = 0; 1145 1146 if (hasFP(MF)) { 1147 // Calculate required stack adjustment. 1148 uint64_t FrameSize = StackSize - SlotSize; 1149 if (needsStackRealignment(MF)) 1150 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; 1151 1152 NumBytes = FrameSize - CSSize; 1153 1154 // Pop EBP. 1155 BuildMI(MBB, MBBI, DL, 1156 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 1157 } else { 1158 NumBytes = StackSize - CSSize; 1159 } 1160 1161 // Skip the callee-saved pop instructions. 1162 MachineBasicBlock::iterator LastCSPop = MBBI; 1163 while (MBBI != MBB.begin()) { 1164 MachineBasicBlock::iterator PI = prior(MBBI); 1165 unsigned Opc = PI->getOpcode(); 1166 1167 if (Opc != X86::POP32r && Opc != X86::POP64r && 1168 !PI->getDesc().isTerminator()) 1169 break; 1170 1171 --MBBI; 1172 } 1173 1174 DL = MBBI->getDebugLoc(); 1175 1176 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1177 // instruction, merge the two instructions. 1178 if (NumBytes || MFI->hasVarSizedObjects()) 1179 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 1180 1181 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1182 // slot before popping them off! Same applies for the case, when stack was 1183 // realigned. 1184 if (needsStackRealignment(MF)) { 1185 // We cannot use LEA here, because stack pointer was realigned. We need to 1186 // deallocate local frame back. 1187 if (CSSize) { 1188 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); 1189 MBBI = prior(LastCSPop); 1190 } 1191 1192 BuildMI(MBB, MBBI, DL, 1193 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1194 StackPtr).addReg(FramePtr); 1195 } else if (MFI->hasVarSizedObjects()) { 1196 if (CSSize) { 1197 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; 1198 MachineInstr *MI = 1199 addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), 1200 FramePtr, false, -CSSize); 1201 MBB.insert(MBBI, MI); 1202 } else { 1203 BuildMI(MBB, MBBI, DL, 1204 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) 1205 .addReg(FramePtr); 1206 } 1207 } else if (NumBytes) { 1208 // Adjust stack pointer back: ESP += numbytes. 1209 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); 1210 } 1211 1212 // We're returning from function via eh_return. 1213 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 1214 MBBI = prior(MBB.end()); 1215 MachineOperand &DestAddr = MBBI->getOperand(0); 1216 assert(DestAddr.isReg() && "Offset should be in register!"); 1217 BuildMI(MBB, MBBI, DL, 1218 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1219 StackPtr).addReg(DestAddr.getReg()); 1220 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 1221 RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { 1222 // Tail call return: adjust the stack pointer and jump to callee. 1223 MBBI = prior(MBB.end()); 1224 MachineOperand &JumpTarget = MBBI->getOperand(0); 1225 MachineOperand &StackAdjust = MBBI->getOperand(1); 1226 assert(StackAdjust.isImm() && "Expecting immediate value."); 1227 1228 // Adjust stack pointer. 1229 int StackAdj = StackAdjust.getImm(); 1230 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 1231 int Offset = 0; 1232 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 1233 1234 // Incoporate the retaddr area. 1235 Offset = StackAdj-MaxTCDelta; 1236 assert(Offset >= 0 && "Offset should never be negative"); 1237 1238 if (Offset) { 1239 // Check for possible merge with preceeding ADD instruction. 1240 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1241 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); 1242 } 1243 1244 // Jump to label or value in register. 1245 if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) 1246 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)). 1247 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1248 else if (RetOpcode== X86::TCRETURNri64) 1249 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); 1250 else 1251 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); 1252 1253 // Delete the pseudo instruction TCRETURN. 1254 MBB.erase(MBBI); 1255 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 1256 (X86FI->getTCReturnAddrDelta() < 0)) { 1257 // Add the return addr area delta back since we are not tail calling. 1258 int delta = -1*X86FI->getTCReturnAddrDelta(); 1259 MBBI = prior(MBB.end()); 1260 1261 // Check for possible merge with preceeding ADD instruction. 1262 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1263 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); 1264 } 1265} 1266 1267unsigned X86RegisterInfo::getRARegister() const { 1268 return Is64Bit ? X86::RIP // Should have dwarf #16. 1269 : X86::EIP; // Should have dwarf #8. 1270} 1271 1272unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { 1273 return hasFP(MF) ? FramePtr : StackPtr; 1274} 1275 1276void 1277X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { 1278 // Calculate amount of bytes used for return address storing 1279 int stackGrowth = (Is64Bit ? -8 : -4); 1280 1281 // Initial state of the frame pointer is esp+4. 1282 MachineLocation Dst(MachineLocation::VirtualFP); 1283 MachineLocation Src(StackPtr, stackGrowth); 1284 Moves.push_back(MachineMove(0, Dst, Src)); 1285 1286 // Add return address to move list 1287 MachineLocation CSDst(StackPtr, stackGrowth); 1288 MachineLocation CSSrc(getRARegister()); 1289 Moves.push_back(MachineMove(0, CSDst, CSSrc)); 1290} 1291 1292unsigned X86RegisterInfo::getEHExceptionRegister() const { 1293 llvm_unreachable("What is the exception register"); 1294 return 0; 1295} 1296 1297unsigned X86RegisterInfo::getEHHandlerRegister() const { 1298 llvm_unreachable("What is the exception handler register"); 1299 return 0; 1300} 1301 1302namespace llvm { 1303unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { 1304 switch (VT.getSimpleVT().SimpleTy) { 1305 default: return Reg; 1306 case MVT::i8: 1307 if (High) { 1308 switch (Reg) { 1309 default: return 0; 1310 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1311 return X86::AH; 1312 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1313 return X86::DH; 1314 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1315 return X86::CH; 1316 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1317 return X86::BH; 1318 } 1319 } else { 1320 switch (Reg) { 1321 default: return 0; 1322 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1323 return X86::AL; 1324 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1325 return X86::DL; 1326 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1327 return X86::CL; 1328 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1329 return X86::BL; 1330 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1331 return X86::SIL; 1332 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1333 return X86::DIL; 1334 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1335 return X86::BPL; 1336 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1337 return X86::SPL; 1338 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1339 return X86::R8B; 1340 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1341 return X86::R9B; 1342 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1343 return X86::R10B; 1344 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1345 return X86::R11B; 1346 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1347 return X86::R12B; 1348 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1349 return X86::R13B; 1350 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1351 return X86::R14B; 1352 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1353 return X86::R15B; 1354 } 1355 } 1356 case MVT::i16: 1357 switch (Reg) { 1358 default: return Reg; 1359 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1360 return X86::AX; 1361 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1362 return X86::DX; 1363 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1364 return X86::CX; 1365 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1366 return X86::BX; 1367 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1368 return X86::SI; 1369 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1370 return X86::DI; 1371 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1372 return X86::BP; 1373 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1374 return X86::SP; 1375 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1376 return X86::R8W; 1377 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1378 return X86::R9W; 1379 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1380 return X86::R10W; 1381 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1382 return X86::R11W; 1383 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1384 return X86::R12W; 1385 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1386 return X86::R13W; 1387 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1388 return X86::R14W; 1389 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1390 return X86::R15W; 1391 } 1392 case MVT::i32: 1393 switch (Reg) { 1394 default: return Reg; 1395 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1396 return X86::EAX; 1397 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1398 return X86::EDX; 1399 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1400 return X86::ECX; 1401 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1402 return X86::EBX; 1403 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1404 return X86::ESI; 1405 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1406 return X86::EDI; 1407 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1408 return X86::EBP; 1409 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1410 return X86::ESP; 1411 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1412 return X86::R8D; 1413 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1414 return X86::R9D; 1415 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1416 return X86::R10D; 1417 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1418 return X86::R11D; 1419 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1420 return X86::R12D; 1421 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1422 return X86::R13D; 1423 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1424 return X86::R14D; 1425 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1426 return X86::R15D; 1427 } 1428 case MVT::i64: 1429 switch (Reg) { 1430 default: return Reg; 1431 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1432 return X86::RAX; 1433 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1434 return X86::RDX; 1435 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1436 return X86::RCX; 1437 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1438 return X86::RBX; 1439 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1440 return X86::RSI; 1441 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1442 return X86::RDI; 1443 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1444 return X86::RBP; 1445 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1446 return X86::RSP; 1447 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1448 return X86::R8; 1449 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1450 return X86::R9; 1451 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1452 return X86::R10; 1453 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1454 return X86::R11; 1455 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1456 return X86::R12; 1457 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1458 return X86::R13; 1459 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1460 return X86::R14; 1461 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1462 return X86::R15; 1463 } 1464 } 1465 1466 return Reg; 1467} 1468} 1469 1470#include "X86GenRegisterInfo.inc" 1471 1472namespace { 1473 struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass { 1474 static char ID; 1475 MSAC() : MachineFunctionPass(&ID) {} 1476 1477 virtual bool runOnMachineFunction(MachineFunction &MF) { 1478 MachineFrameInfo *FFI = MF.getFrameInfo(); 1479 MachineRegisterInfo &RI = MF.getRegInfo(); 1480 1481 // Calculate max stack alignment of all already allocated stack objects. 1482 unsigned MaxAlign = calculateMaxStackAlignment(FFI); 1483 1484 // Be over-conservative: scan over all vreg defs and find, whether vector 1485 // registers are used. If yes - there is probability, that vector register 1486 // will be spilled and thus stack needs to be aligned properly. 1487 for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; 1488 RegNum < RI.getLastVirtReg(); ++RegNum) 1489 MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); 1490 1491 if (FFI->getMaxAlignment() == MaxAlign) 1492 return false; 1493 1494 FFI->setMaxAlignment(MaxAlign); 1495 return true; 1496 } 1497 1498 virtual const char *getPassName() const { 1499 return "X86 Maximal Stack Alignment Calculator"; 1500 } 1501 1502 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 1503 AU.setPreservesCFG(); 1504 MachineFunctionPass::getAnalysisUsage(AU); 1505 } 1506 }; 1507 1508 char MSAC::ID = 0; 1509} 1510 1511FunctionPass* 1512llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); } 1513