X86RegisterInfo.cpp revision 825b72b0571821bf2d378749f69d6c4cfb52d2f9
1//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of the TargetRegisterInfo class. 11// This file is responsible for the frame pointer elimination optimization 12// on X86. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86RegisterInfo.h" 18#include "X86InstrBuilder.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/Constants.h" 23#include "llvm/Function.h" 24#include "llvm/Type.h" 25#include "llvm/CodeGen/ValueTypes.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineFunctionPass.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineLocation.h" 31#include "llvm/CodeGen/MachineModuleInfo.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/Target/TargetAsmInfo.h" 34#include "llvm/Target/TargetFrameInfo.h" 35#include "llvm/Target/TargetInstrInfo.h" 36#include "llvm/Target/TargetMachine.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/BitVector.h" 39#include "llvm/ADT/STLExtras.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/Compiler.h" 42#include "llvm/Support/ErrorHandling.h" 43using namespace llvm; 44 45X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, 46 const TargetInstrInfo &tii) 47 : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? 48 X86::ADJCALLSTACKDOWN64 : 49 X86::ADJCALLSTACKDOWN32, 50 tm.getSubtarget<X86Subtarget>().is64Bit() ? 51 X86::ADJCALLSTACKUP64 : 52 X86::ADJCALLSTACKUP32), 53 TM(tm), TII(tii) { 54 // Cache some information. 55 const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); 56 Is64Bit = Subtarget->is64Bit(); 57 IsWin64 = Subtarget->isTargetWin64(); 58 StackAlign = TM.getFrameInfo()->getStackAlignment(); 59 if (Is64Bit) { 60 SlotSize = 8; 61 StackPtr = X86::RSP; 62 FramePtr = X86::RBP; 63 } else { 64 SlotSize = 4; 65 StackPtr = X86::ESP; 66 FramePtr = X86::EBP; 67 } 68} 69 70// getDwarfRegNum - This function maps LLVM register identifiers to the 71// Dwarf specific numbering, used in debug info and exception tables. 72 73int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { 74 const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); 75 unsigned Flavour = DWARFFlavour::X86_64; 76 if (!Subtarget->is64Bit()) { 77 if (Subtarget->isTargetDarwin()) { 78 if (isEH) 79 Flavour = DWARFFlavour::X86_32_DarwinEH; 80 else 81 Flavour = DWARFFlavour::X86_32_Generic; 82 } else if (Subtarget->isTargetCygMing()) { 83 // Unsupported by now, just quick fallback 84 Flavour = DWARFFlavour::X86_32_Generic; 85 } else { 86 Flavour = DWARFFlavour::X86_32_Generic; 87 } 88 } 89 90 return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); 91} 92 93// getX86RegNum - This function maps LLVM register identifiers to their X86 94// specific numbering, which is used in various places encoding instructions. 95// 96unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { 97 switch(RegNo) { 98 case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; 99 case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; 100 case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; 101 case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; 102 case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: 103 return N86::ESP; 104 case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: 105 return N86::EBP; 106 case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: 107 return N86::ESI; 108 case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: 109 return N86::EDI; 110 111 case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: 112 return N86::EAX; 113 case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: 114 return N86::ECX; 115 case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: 116 return N86::EDX; 117 case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: 118 return N86::EBX; 119 case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: 120 return N86::ESP; 121 case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: 122 return N86::EBP; 123 case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: 124 return N86::ESI; 125 case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: 126 return N86::EDI; 127 128 case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: 129 case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: 130 return RegNo-X86::ST0; 131 132 case X86::XMM0: case X86::XMM8: case X86::MM0: 133 return 0; 134 case X86::XMM1: case X86::XMM9: case X86::MM1: 135 return 1; 136 case X86::XMM2: case X86::XMM10: case X86::MM2: 137 return 2; 138 case X86::XMM3: case X86::XMM11: case X86::MM3: 139 return 3; 140 case X86::XMM4: case X86::XMM12: case X86::MM4: 141 return 4; 142 case X86::XMM5: case X86::XMM13: case X86::MM5: 143 return 5; 144 case X86::XMM6: case X86::XMM14: case X86::MM6: 145 return 6; 146 case X86::XMM7: case X86::XMM15: case X86::MM7: 147 return 7; 148 149 default: 150 assert(isVirtualRegister(RegNo) && "Unknown physical register!"); 151 llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); 152 return 0; 153 } 154} 155 156const TargetRegisterClass * 157X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, 158 const TargetRegisterClass *B, 159 unsigned SubIdx) const { 160 switch (SubIdx) { 161 default: return 0; 162 case 1: 163 // 8-bit 164 if (B == &X86::GR8RegClass) { 165 if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) 166 return A; 167 } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { 168 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 169 A == &X86::GR64_NOREXRegClass || 170 A == &X86::GR64_NOSPRegClass || 171 A == &X86::GR64_NOREX_NOSPRegClass) 172 return &X86::GR64_ABCDRegClass; 173 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || 174 A == &X86::GR32_NOREXRegClass || 175 A == &X86::GR32_NOSPRegClass) 176 return &X86::GR32_ABCDRegClass; 177 else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || 178 A == &X86::GR16_NOREXRegClass) 179 return &X86::GR16_ABCDRegClass; 180 } else if (B == &X86::GR8_NOREXRegClass) { 181 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || 182 A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) 183 return &X86::GR64_NOREXRegClass; 184 else if (A == &X86::GR64_ABCDRegClass) 185 return &X86::GR64_ABCDRegClass; 186 else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || 187 A == &X86::GR32_NOSPRegClass) 188 return &X86::GR32_NOREXRegClass; 189 else if (A == &X86::GR32_ABCDRegClass) 190 return &X86::GR32_ABCDRegClass; 191 else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) 192 return &X86::GR16_NOREXRegClass; 193 else if (A == &X86::GR16_ABCDRegClass) 194 return &X86::GR16_ABCDRegClass; 195 } 196 break; 197 case 2: 198 // 8-bit hi 199 if (B == &X86::GR8_ABCD_HRegClass) { 200 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 201 A == &X86::GR64_NOREXRegClass || 202 A == &X86::GR64_NOSPRegClass || 203 A == &X86::GR64_NOREX_NOSPRegClass) 204 return &X86::GR64_ABCDRegClass; 205 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || 206 A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) 207 return &X86::GR32_ABCDRegClass; 208 else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || 209 A == &X86::GR16_NOREXRegClass) 210 return &X86::GR16_ABCDRegClass; 211 } 212 break; 213 case 3: 214 // 16-bit 215 if (B == &X86::GR16RegClass) { 216 if (A->getSize() == 4 || A->getSize() == 8) 217 return A; 218 } else if (B == &X86::GR16_ABCDRegClass) { 219 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 220 A == &X86::GR64_NOREXRegClass || 221 A == &X86::GR64_NOSPRegClass || 222 A == &X86::GR64_NOREX_NOSPRegClass) 223 return &X86::GR64_ABCDRegClass; 224 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || 225 A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) 226 return &X86::GR32_ABCDRegClass; 227 } else if (B == &X86::GR16_NOREXRegClass) { 228 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || 229 A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) 230 return &X86::GR64_NOREXRegClass; 231 else if (A == &X86::GR64_ABCDRegClass) 232 return &X86::GR64_ABCDRegClass; 233 else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || 234 A == &X86::GR32_NOSPRegClass) 235 return &X86::GR32_NOREXRegClass; 236 else if (A == &X86::GR32_ABCDRegClass) 237 return &X86::GR64_ABCDRegClass; 238 } 239 break; 240 case 4: 241 // 32-bit 242 if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { 243 if (A->getSize() == 8) 244 return A; 245 } else if (B == &X86::GR32_ABCDRegClass) { 246 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || 247 A == &X86::GR64_NOREXRegClass || 248 A == &X86::GR64_NOSPRegClass || 249 A == &X86::GR64_NOREX_NOSPRegClass) 250 return &X86::GR64_ABCDRegClass; 251 } else if (B == &X86::GR32_NOREXRegClass) { 252 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || 253 A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) 254 return &X86::GR64_NOREXRegClass; 255 else if (A == &X86::GR64_ABCDRegClass) 256 return &X86::GR64_ABCDRegClass; 257 } 258 break; 259 } 260 return 0; 261} 262 263const TargetRegisterClass *X86RegisterInfo:: 264getPointerRegClass(unsigned Kind) const { 265 switch (Kind) { 266 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); 267 case 0: // Normal GPRs. 268 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 269 return &X86::GR64RegClass; 270 return &X86::GR32RegClass; 271 case 1: // Normal GRPs except the stack pointer (for encoding reasons). 272 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 273 return &X86::GR64_NOSPRegClass; 274 return &X86::GR32_NOSPRegClass; 275 } 276} 277 278const TargetRegisterClass * 279X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 280 if (RC == &X86::CCRRegClass) { 281 if (Is64Bit) 282 return &X86::GR64RegClass; 283 else 284 return &X86::GR32RegClass; 285 } 286 return NULL; 287} 288 289const unsigned * 290X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 291 bool callsEHReturn = false; 292 293 if (MF) { 294 const MachineFrameInfo *MFI = MF->getFrameInfo(); 295 const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 296 callsEHReturn = (MMI ? MMI->callsEHReturn() : false); 297 } 298 299 static const unsigned CalleeSavedRegs32Bit[] = { 300 X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 301 }; 302 303 static const unsigned CalleeSavedRegs32EHRet[] = { 304 X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 305 }; 306 307 static const unsigned CalleeSavedRegs64Bit[] = { 308 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 309 }; 310 311 static const unsigned CalleeSavedRegs64EHRet[] = { 312 X86::RAX, X86::RDX, X86::RBX, X86::R12, 313 X86::R13, X86::R14, X86::R15, X86::RBP, 0 314 }; 315 316 static const unsigned CalleeSavedRegsWin64[] = { 317 X86::RBX, X86::RBP, X86::RDI, X86::RSI, 318 X86::R12, X86::R13, X86::R14, X86::R15, 319 X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9, 320 X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, 321 X86::XMM14, X86::XMM15, 0 322 }; 323 324 if (Is64Bit) { 325 if (IsWin64) 326 return CalleeSavedRegsWin64; 327 else 328 return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit); 329 } else { 330 return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit); 331 } 332} 333 334const TargetRegisterClass* const* 335X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { 336 bool callsEHReturn = false; 337 338 if (MF) { 339 const MachineFrameInfo *MFI = MF->getFrameInfo(); 340 const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 341 callsEHReturn = (MMI ? MMI->callsEHReturn() : false); 342 } 343 344 static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { 345 &X86::GR32RegClass, &X86::GR32RegClass, 346 &X86::GR32RegClass, &X86::GR32RegClass, 0 347 }; 348 static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { 349 &X86::GR32RegClass, &X86::GR32RegClass, 350 &X86::GR32RegClass, &X86::GR32RegClass, 351 &X86::GR32RegClass, &X86::GR32RegClass, 0 352 }; 353 static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { 354 &X86::GR64RegClass, &X86::GR64RegClass, 355 &X86::GR64RegClass, &X86::GR64RegClass, 356 &X86::GR64RegClass, &X86::GR64RegClass, 0 357 }; 358 static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { 359 &X86::GR64RegClass, &X86::GR64RegClass, 360 &X86::GR64RegClass, &X86::GR64RegClass, 361 &X86::GR64RegClass, &X86::GR64RegClass, 362 &X86::GR64RegClass, &X86::GR64RegClass, 0 363 }; 364 static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { 365 &X86::GR64RegClass, &X86::GR64RegClass, 366 &X86::GR64RegClass, &X86::GR64RegClass, 367 &X86::GR64RegClass, &X86::GR64RegClass, 368 &X86::GR64RegClass, &X86::GR64RegClass, 369 &X86::VR128RegClass, &X86::VR128RegClass, 370 &X86::VR128RegClass, &X86::VR128RegClass, 371 &X86::VR128RegClass, &X86::VR128RegClass, 372 &X86::VR128RegClass, &X86::VR128RegClass, 373 &X86::VR128RegClass, &X86::VR128RegClass, 0 374 }; 375 376 if (Is64Bit) { 377 if (IsWin64) 378 return CalleeSavedRegClassesWin64; 379 else 380 return (callsEHReturn ? 381 CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); 382 } else { 383 return (callsEHReturn ? 384 CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); 385 } 386} 387 388BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { 389 BitVector Reserved(getNumRegs()); 390 // Set the stack-pointer register and its aliases as reserved. 391 Reserved.set(X86::RSP); 392 Reserved.set(X86::ESP); 393 Reserved.set(X86::SP); 394 Reserved.set(X86::SPL); 395 // Set the frame-pointer register and its aliases as reserved if needed. 396 if (hasFP(MF)) { 397 Reserved.set(X86::RBP); 398 Reserved.set(X86::EBP); 399 Reserved.set(X86::BP); 400 Reserved.set(X86::BPL); 401 } 402 // Mark the x87 stack registers as reserved, since they don't 403 // behave normally with respect to liveness. We don't fully 404 // model the effects of x87 stack pushes and pops after 405 // stackification. 406 Reserved.set(X86::ST0); 407 Reserved.set(X86::ST1); 408 Reserved.set(X86::ST2); 409 Reserved.set(X86::ST3); 410 Reserved.set(X86::ST4); 411 Reserved.set(X86::ST5); 412 Reserved.set(X86::ST6); 413 Reserved.set(X86::ST7); 414 return Reserved; 415} 416 417//===----------------------------------------------------------------------===// 418// Stack Frame Processing methods 419//===----------------------------------------------------------------------===// 420 421static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { 422 unsigned MaxAlign = 0; 423 for (int i = FFI->getObjectIndexBegin(), 424 e = FFI->getObjectIndexEnd(); i != e; ++i) { 425 if (FFI->isDeadObjectIndex(i)) 426 continue; 427 unsigned Align = FFI->getObjectAlignment(i); 428 MaxAlign = std::max(MaxAlign, Align); 429 } 430 431 return MaxAlign; 432} 433 434// hasFP - Return true if the specified function should have a dedicated frame 435// pointer register. This is true if the function has variable sized allocas or 436// if frame pointer elimination is disabled. 437// 438bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { 439 const MachineFrameInfo *MFI = MF.getFrameInfo(); 440 const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 441 442 return (NoFramePointerElim || 443 needsStackRealignment(MF) || 444 MFI->hasVarSizedObjects() || 445 MFI->isFrameAddressTaken() || 446 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 447 (MMI && MMI->callsUnwindInit())); 448} 449 450bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { 451 const MachineFrameInfo *MFI = MF.getFrameInfo(); 452 453 // FIXME: Currently we don't support stack realignment for functions with 454 // variable-sized allocas 455 return (RealignStack && 456 (MFI->getMaxAlignment() > StackAlign && 457 !MFI->hasVarSizedObjects())); 458} 459 460bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { 461 return !MF.getFrameInfo()->hasVarSizedObjects(); 462} 463 464bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, 465 int &FrameIdx) const { 466 if (Reg == FramePtr && hasFP(MF)) { 467 FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); 468 return true; 469 } 470 return false; 471} 472 473 474int 475X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { 476 const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); 477 MachineFrameInfo *MFI = MF.getFrameInfo(); 478 479 int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); 480 uint64_t StackSize = MFI->getStackSize(); 481 482 if (needsStackRealignment(MF)) { 483 if (FI < 0) 484 // Skip the saved EBP 485 Offset += SlotSize; 486 else { 487 unsigned Align = MFI->getObjectAlignment(FI); 488 assert( (-(Offset + StackSize)) % Align == 0); 489 Align = 0; 490 return Offset + StackSize; 491 } 492 493 // FIXME: Support tail calls 494 } else { 495 if (!hasFP(MF)) 496 return Offset + StackSize; 497 498 // Skip the saved EBP 499 Offset += SlotSize; 500 501 // Skip the RETADDR move area 502 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 503 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 504 if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; 505 } 506 507 return Offset; 508} 509 510void X86RegisterInfo:: 511eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 512 MachineBasicBlock::iterator I) const { 513 if (!hasReservedCallFrame(MF)) { 514 // If the stack pointer can be changed after prologue, turn the 515 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 516 // adjcallstackdown instruction into 'add ESP, <amt>' 517 // TODO: consider using push / pop instead of sub + store / add 518 MachineInstr *Old = I; 519 uint64_t Amount = Old->getOperand(0).getImm(); 520 if (Amount != 0) { 521 // We need to keep the stack aligned properly. To do this, we round the 522 // amount of space needed for the outgoing arguments up to the next 523 // alignment boundary. 524 Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; 525 526 MachineInstr *New = 0; 527 if (Old->getOpcode() == getCallFrameSetupOpcode()) { 528 New = BuildMI(MF, Old->getDebugLoc(), 529 TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), 530 StackPtr).addReg(StackPtr).addImm(Amount); 531 } else { 532 assert(Old->getOpcode() == getCallFrameDestroyOpcode()); 533 // factor out the amount the callee already popped. 534 uint64_t CalleeAmt = Old->getOperand(1).getImm(); 535 Amount -= CalleeAmt; 536 if (Amount) { 537 unsigned Opc = (Amount < 128) ? 538 (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : 539 (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); 540 New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) 541 .addReg(StackPtr).addImm(Amount); 542 } 543 } 544 545 if (New) { 546 // The EFLAGS implicit def is dead. 547 New->getOperand(3).setIsDead(); 548 549 // Replace the pseudo instruction with a new instruction... 550 MBB.insert(I, New); 551 } 552 } 553 } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { 554 // If we are performing frame pointer elimination and if the callee pops 555 // something off the stack pointer, add it back. We do this until we have 556 // more advanced stack pointer tracking ability. 557 if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { 558 unsigned Opc = (CalleeAmt < 128) ? 559 (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : 560 (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); 561 MachineInstr *Old = I; 562 MachineInstr *New = 563 BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), 564 StackPtr).addReg(StackPtr).addImm(CalleeAmt); 565 // The EFLAGS implicit def is dead. 566 New->getOperand(3).setIsDead(); 567 568 MBB.insert(I, New); 569 } 570 } 571 572 MBB.erase(I); 573} 574 575void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 576 int SPAdj, RegScavenger *RS) const{ 577 assert(SPAdj == 0 && "Unexpected"); 578 579 unsigned i = 0; 580 MachineInstr &MI = *II; 581 MachineFunction &MF = *MI.getParent()->getParent(); 582 while (!MI.getOperand(i).isFI()) { 583 ++i; 584 assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); 585 } 586 587 int FrameIndex = MI.getOperand(i).getIndex(); 588 589 unsigned BasePtr; 590 if (needsStackRealignment(MF)) 591 BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); 592 else 593 BasePtr = (hasFP(MF) ? FramePtr : StackPtr); 594 595 // This must be part of a four operand memory reference. Replace the 596 // FrameIndex with base register with EBP. Add an offset to the offset. 597 MI.getOperand(i).ChangeToRegister(BasePtr, false); 598 599 // Now add the frame object offset to the offset from EBP. 600 if (MI.getOperand(i+3).isImm()) { 601 // Offset is a 32-bit integer. 602 int Offset = getFrameIndexOffset(MF, FrameIndex) + 603 (int)(MI.getOperand(i+3).getImm()); 604 605 MI.getOperand(i+3).ChangeToImmediate(Offset); 606 } else { 607 // Offset is symbolic. This is extremely rare. 608 uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + 609 (uint64_t)MI.getOperand(i+3).getOffset(); 610 MI.getOperand(i+3).setOffset(Offset); 611 } 612} 613 614void 615X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 616 RegScavenger *RS) const { 617 MachineFrameInfo *MFI = MF.getFrameInfo(); 618 619 // Calculate and set max stack object alignment early, so we can decide 620 // whether we will need stack realignment (and thus FP). 621 unsigned MaxAlign = std::max(MFI->getMaxAlignment(), 622 calculateMaxStackAlignment(MFI)); 623 624 MFI->setMaxAlignment(MaxAlign); 625 626 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 627 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 628 if (TailCallReturnAddrDelta < 0) { 629 // create RETURNADDR area 630 // arg 631 // arg 632 // RETADDR 633 // { ... 634 // RETADDR area 635 // ... 636 // } 637 // [EBP] 638 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 639 (-1*SlotSize)+TailCallReturnAddrDelta); 640 } 641 642 if (hasFP(MF)) { 643 assert((TailCallReturnAddrDelta <= 0) && 644 "The Delta should always be zero or negative"); 645 const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); 646 // Create a frame entry for the EBP register that must be saved. 647 int FrameIdx = MFI->CreateFixedObject(SlotSize, 648 -(int)SlotSize + 649 TFI.getOffsetOfLocalArea() + 650 TailCallReturnAddrDelta); 651 assert(FrameIdx == MFI->getObjectIndexBegin() && 652 "Slot for EBP register must be last in order to be found!"); 653 FrameIdx = 0; 654 } 655} 656 657/// emitSPUpdate - Emit a series of instructions to increment / decrement the 658/// stack pointer by a constant value. 659static 660void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 661 unsigned StackPtr, int64_t NumBytes, bool Is64Bit, 662 const TargetInstrInfo &TII) { 663 bool isSub = NumBytes < 0; 664 uint64_t Offset = isSub ? -NumBytes : NumBytes; 665 unsigned Opc = isSub 666 ? ((Offset < 128) ? 667 (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : 668 (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) 669 : ((Offset < 128) ? 670 (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : 671 (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); 672 uint64_t Chunk = (1LL << 31) - 1; 673 DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : 674 DebugLoc::getUnknownLoc()); 675 676 while (Offset) { 677 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 678 MachineInstr *MI = 679 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 680 .addReg(StackPtr).addImm(ThisVal); 681 // The EFLAGS implicit def is dead. 682 MI->getOperand(3).setIsDead(); 683 Offset -= ThisVal; 684 } 685} 686 687// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 688static 689void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 690 unsigned StackPtr, uint64_t *NumBytes = NULL) { 691 if (MBBI == MBB.begin()) return; 692 693 MachineBasicBlock::iterator PI = prior(MBBI); 694 unsigned Opc = PI->getOpcode(); 695 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 696 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 697 PI->getOperand(0).getReg() == StackPtr) { 698 if (NumBytes) 699 *NumBytes += PI->getOperand(2).getImm(); 700 MBB.erase(PI); 701 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 702 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 703 PI->getOperand(0).getReg() == StackPtr) { 704 if (NumBytes) 705 *NumBytes -= PI->getOperand(2).getImm(); 706 MBB.erase(PI); 707 } 708} 709 710// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. 711static 712void mergeSPUpdatesDown(MachineBasicBlock &MBB, 713 MachineBasicBlock::iterator &MBBI, 714 unsigned StackPtr, uint64_t *NumBytes = NULL) { 715 return; 716 717 if (MBBI == MBB.end()) return; 718 719 MachineBasicBlock::iterator NI = next(MBBI); 720 if (NI == MBB.end()) return; 721 722 unsigned Opc = NI->getOpcode(); 723 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 724 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 725 NI->getOperand(0).getReg() == StackPtr) { 726 if (NumBytes) 727 *NumBytes -= NI->getOperand(2).getImm(); 728 MBB.erase(NI); 729 MBBI = NI; 730 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 731 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 732 NI->getOperand(0).getReg() == StackPtr) { 733 if (NumBytes) 734 *NumBytes += NI->getOperand(2).getImm(); 735 MBB.erase(NI); 736 MBBI = NI; 737 } 738} 739 740/// mergeSPUpdates - Checks the instruction before/after the passed 741/// instruction. If it is an ADD/SUB instruction it is deleted 742/// argument and the stack adjustment is returned as a positive value for ADD 743/// and a negative for SUB. 744static int mergeSPUpdates(MachineBasicBlock &MBB, 745 MachineBasicBlock::iterator &MBBI, 746 unsigned StackPtr, 747 bool doMergeWithPrevious) { 748 749 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 750 (!doMergeWithPrevious && MBBI == MBB.end())) 751 return 0; 752 753 int Offset = 0; 754 755 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 756 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); 757 unsigned Opc = PI->getOpcode(); 758 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 759 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 760 PI->getOperand(0).getReg() == StackPtr){ 761 Offset += PI->getOperand(2).getImm(); 762 MBB.erase(PI); 763 if (!doMergeWithPrevious) MBBI = NI; 764 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 765 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 766 PI->getOperand(0).getReg() == StackPtr) { 767 Offset -= PI->getOperand(2).getImm(); 768 MBB.erase(PI); 769 if (!doMergeWithPrevious) MBBI = NI; 770 } 771 772 return Offset; 773} 774 775void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, 776 unsigned LabelId, 777 unsigned FramePtr) const { 778 MachineFrameInfo *MFI = MF.getFrameInfo(); 779 MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 780 if (!MMI) return; 781 782 // Add callee saved registers to move list. 783 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 784 if (CSI.empty()) return; 785 786 std::vector<MachineMove> &Moves = MMI->getFrameMoves(); 787 const TargetData *TD = MF.getTarget().getTargetData(); 788 bool HasFP = hasFP(MF); 789 790 // Calculate amount of bytes used for return address storing 791 int stackGrowth = 792 (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == 793 TargetFrameInfo::StackGrowsUp ? 794 TD->getPointerSize() : -TD->getPointerSize()); 795 796 // FIXME: This is dirty hack. The code itself is pretty mess right now. 797 // It should be rewritten from scratch and generalized sometimes. 798 799 // Determine maximum offset (minumum due to stack growth) 800 int64_t MaxOffset = 0; 801 for (std::vector<CalleeSavedInfo>::const_iterator 802 I = CSI.begin(), E = CSI.end(); I != E; ++I) 803 MaxOffset = std::min(MaxOffset, 804 MFI->getObjectOffset(I->getFrameIdx())); 805 806 // Calculate offsets. 807 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 808 for (std::vector<CalleeSavedInfo>::const_iterator 809 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 810 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 811 unsigned Reg = I->getReg(); 812 Offset = MaxOffset - Offset + saveAreaOffset; 813 814 // Don't output a new machine move if we're re-saving the frame 815 // pointer. This happens when the PrologEpilogInserter has inserted an extra 816 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 817 // generates one when frame pointers are used. If we generate a "machine 818 // move" for this extra "PUSH", the linker will lose track of the fact that 819 // the frame pointer should have the value of the first "PUSH" when it's 820 // trying to unwind. 821 // 822 // FIXME: This looks inelegant. It's possibly correct, but it's covering up 823 // another bug. I.e., one where we generate a prolog like this: 824 // 825 // pushl %ebp 826 // movl %esp, %ebp 827 // pushl %ebp 828 // pushl %esi 829 // ... 830 // 831 // The immediate re-push of EBP is unnecessary. At the least, it's an 832 // optimization bug. EBP can be used as a scratch register in certain 833 // cases, but probably not when we have a frame pointer. 834 if (HasFP && FramePtr == Reg) 835 continue; 836 837 MachineLocation CSDst(MachineLocation::VirtualFP, Offset); 838 MachineLocation CSSrc(Reg); 839 Moves.push_back(MachineMove(LabelId, CSDst, CSSrc)); 840 } 841} 842 843void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { 844 MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB 845 MachineFrameInfo *MFI = MF.getFrameInfo(); 846 const Function* Fn = MF.getFunction(); 847 const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); 848 MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); 849 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 850 MachineBasicBlock::iterator MBBI = MBB.begin(); 851 bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || 852 !Fn->doesNotThrow() || 853 UnwindTablesMandatory; 854 bool HasFP = hasFP(MF); 855 DebugLoc DL; 856 857 // Get the number of bytes to allocate from the FrameInfo. 858 uint64_t StackSize = MFI->getStackSize(); 859 860 // Get desired stack alignment 861 uint64_t MaxAlign = MFI->getMaxAlignment(); 862 863 // Add RETADDR move area to callee saved frame size. 864 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 865 if (TailCallReturnAddrDelta < 0) 866 X86FI->setCalleeSavedFrameSize( 867 X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta)); 868 869 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 870 // function, and use up to 128 bytes of stack space, don't have a frame 871 // pointer, calls, or dynamic alloca then we do not need to adjust the 872 // stack pointer (we fit in the Red Zone). 873 bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone); 874 if (Is64Bit && !DisableRedZone && 875 !needsStackRealignment(MF) && 876 !MFI->hasVarSizedObjects() && // No dynamic alloca. 877 !MFI->hasCalls() && // No calls. 878 !Subtarget->isTargetWin64()) { // Win64 has no Red Zone 879 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 880 if (HasFP) MinSize += SlotSize; 881 StackSize = std::max(MinSize, 882 StackSize > 128 ? StackSize - 128 : 0); 883 MFI->setStackSize(StackSize); 884 } else if (Subtarget->isTargetWin64()) { 885 // We need to always allocate 32 bytes as register spill area. 886 // FIXME: we might reuse these 32 bytes for leaf functions. 887 StackSize += 32; 888 MFI->setStackSize(StackSize); 889 } 890 891 // Insert stack pointer adjustment for later moving of return addr. Only 892 // applies to tail call optimized functions where the callee argument stack 893 // size is bigger than the callers. 894 if (TailCallReturnAddrDelta < 0) { 895 MachineInstr *MI = 896 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), 897 StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta); 898 // The EFLAGS implicit def is dead. 899 MI->getOperand(3).setIsDead(); 900 } 901 902 // uint64_t StackSize = MFI->getStackSize(); 903 std::vector<MachineMove> &Moves = MMI->getFrameMoves(); 904 const TargetData *TD = MF.getTarget().getTargetData(); 905 int stackGrowth = 906 (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == 907 TargetFrameInfo::StackGrowsUp ? 908 TD->getPointerSize() : -TD->getPointerSize()); 909 910 uint64_t NumBytes = 0; 911 if (HasFP) { 912 // Calculate required stack adjustment 913 uint64_t FrameSize = StackSize - SlotSize; 914 if (needsStackRealignment(MF)) 915 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; 916 917 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 918 919 // Get the offset of the stack slot for the EBP register, which is 920 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 921 // Update the frame offset adjustment. 922 MFI->setOffsetAdjustment(-NumBytes); 923 924 // Save EBP/RBP into the appropriate stack slot... 925 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 926 .addReg(FramePtr, RegState::Kill); 927 928 if (needsFrameMoves) { 929 // Mark effective beginning of when frame pointer becomes valid. 930 unsigned FrameLabelId = MMI->NextLabelID(); 931 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); 932 933 // Define the current CFA rule to use the provided offset. 934 if (StackSize) { 935 MachineLocation SPDst(MachineLocation::VirtualFP); 936 MachineLocation SPSrc(MachineLocation::VirtualFP, 937 HasFP ? 2 * stackGrowth : 938 -StackSize + stackGrowth); 939 Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); 940 } else { 941 // FIXME: Verify & implement for FP 942 MachineLocation SPDst(StackPtr); 943 MachineLocation SPSrc(StackPtr, stackGrowth); 944 Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); 945 } 946 947 // Change the rule for the FramePtr to be an "offset" rule. 948 MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); 949 MachineLocation FPSrc(FramePtr); 950 Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); 951 } 952 953 // Update EBP with the new base value... 954 BuildMI(MBB, MBBI, DL, 955 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 956 .addReg(StackPtr); 957 958 if (needsFrameMoves) { 959 unsigned FrameLabelId = MMI->NextLabelID(); 960 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); 961 962 // Define the current CFA to use the EBP/RBP register. 963 MachineLocation FPDst(FramePtr); 964 MachineLocation FPSrc(MachineLocation::VirtualFP); 965 Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); 966 } 967 968 // Mark the FramePtr as live-in in every block except the entry. 969 for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); 970 I != E; ++I) 971 I->addLiveIn(FramePtr); 972 973 // Realign stack 974 if (needsStackRealignment(MF)) { 975 MachineInstr *MI = 976 BuildMI(MBB, MBBI, DL, 977 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), 978 StackPtr).addReg(StackPtr).addImm(-MaxAlign); 979 980 // The EFLAGS implicit def is dead. 981 MI->getOperand(3).setIsDead(); 982 } 983 } else { 984 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 985 } 986 987 // Skip the callee-saved push instructions. 988 bool RegsSaved = false; 989 while (MBBI != MBB.end() && 990 (MBBI->getOpcode() == X86::PUSH32r || 991 MBBI->getOpcode() == X86::PUSH64r)) { 992 RegsSaved = true; 993 ++MBBI; 994 } 995 996 if (RegsSaved && needsFrameMoves) { 997 // Mark end of callee-saved push instructions. 998 unsigned LabelId = MMI->NextLabelID(); 999 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); 1000 1001 // Emit DWARF info specifying the offsets of the callee-saved registers. 1002 emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr); 1003 } 1004 1005 if (MBBI != MBB.end()) 1006 DL = MBBI->getDebugLoc(); 1007 1008 // Adjust stack pointer: ESP -= numbytes. 1009 if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { 1010 // Check, whether EAX is livein for this function. 1011 bool isEAXAlive = false; 1012 for (MachineRegisterInfo::livein_iterator 1013 II = MF.getRegInfo().livein_begin(), 1014 EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { 1015 unsigned Reg = II->first; 1016 isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || 1017 Reg == X86::AH || Reg == X86::AL); 1018 } 1019 1020 // Function prologue calls _alloca to probe the stack when allocating more 1021 // than 4k bytes in one go. Touching the stack at 4K increments is necessary 1022 // to ensure that the guard pages used by the OS virtual memory manager are 1023 // allocated in correct sequence. 1024 if (!isEAXAlive) { 1025 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 1026 .addImm(NumBytes); 1027 BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) 1028 .addExternalSymbol("_alloca"); 1029 } else { 1030 // Save EAX 1031 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 1032 .addReg(X86::EAX, RegState::Kill); 1033 1034 // Allocate NumBytes-4 bytes on stack. We'll also use 4 already 1035 // allocated bytes for EAX. 1036 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 1037 .addImm(NumBytes - 4); 1038 BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) 1039 .addExternalSymbol("_alloca"); 1040 1041 // Restore EAX 1042 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 1043 X86::EAX), 1044 StackPtr, false, NumBytes - 4); 1045 MBB.insert(MBBI, MI); 1046 } 1047 } else if (NumBytes) { 1048 // If there is an SUB32ri of ESP immediately before this instruction, merge 1049 // the two. This can be the case when tail call elimination is enabled and 1050 // the callee has more arguments then the caller. 1051 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 1052 1053 // If there is an ADD32ri or SUB32ri of ESP immediately after this 1054 // instruction, merge the two instructions. 1055 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 1056 1057 if (NumBytes) 1058 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); 1059 } 1060 1061 if (!HasFP && needsFrameMoves) { 1062 // Mark end of stack pointer adjustment. 1063 unsigned LabelId = MMI->NextLabelID(); 1064 BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); 1065 1066 // Define the current CFA rule to use the provided offset. 1067 if (StackSize) { 1068 MachineLocation SPDst(MachineLocation::VirtualFP); 1069 MachineLocation SPSrc(MachineLocation::VirtualFP, 1070 -StackSize + stackGrowth); 1071 Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); 1072 } else { 1073 // FIXME: Verify & implement for FP 1074 MachineLocation SPDst(StackPtr); 1075 MachineLocation SPSrc(StackPtr, stackGrowth); 1076 Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); 1077 } 1078 } 1079} 1080 1081void X86RegisterInfo::emitEpilogue(MachineFunction &MF, 1082 MachineBasicBlock &MBB) const { 1083 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1084 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1085 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1086 unsigned RetOpcode = MBBI->getOpcode(); 1087 DebugLoc DL = MBBI->getDebugLoc(); 1088 1089 switch (RetOpcode) { 1090 case X86::RET: 1091 case X86::RETI: 1092 case X86::TCRETURNdi: 1093 case X86::TCRETURNri: 1094 case X86::TCRETURNri64: 1095 case X86::TCRETURNdi64: 1096 case X86::EH_RETURN: 1097 case X86::EH_RETURN64: 1098 case X86::TAILJMPd: 1099 case X86::TAILJMPr: 1100 case X86::TAILJMPm: break; // These are ok 1101 default: 1102 llvm_unreachable("Can only insert epilog into returning blocks"); 1103 } 1104 1105 // Get the number of bytes to allocate from the FrameInfo 1106 uint64_t StackSize = MFI->getStackSize(); 1107 uint64_t MaxAlign = MFI->getMaxAlignment(); 1108 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1109 uint64_t NumBytes = 0; 1110 1111 if (hasFP(MF)) { 1112 // Calculate required stack adjustment 1113 uint64_t FrameSize = StackSize - SlotSize; 1114 if (needsStackRealignment(MF)) 1115 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; 1116 1117 NumBytes = FrameSize - CSSize; 1118 1119 // pop EBP. 1120 BuildMI(MBB, MBBI, DL, 1121 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 1122 } else { 1123 NumBytes = StackSize - CSSize; 1124 } 1125 1126 // Skip the callee-saved pop instructions. 1127 MachineBasicBlock::iterator LastCSPop = MBBI; 1128 while (MBBI != MBB.begin()) { 1129 MachineBasicBlock::iterator PI = prior(MBBI); 1130 unsigned Opc = PI->getOpcode(); 1131 if (Opc != X86::POP32r && Opc != X86::POP64r && 1132 !PI->getDesc().isTerminator()) 1133 break; 1134 --MBBI; 1135 } 1136 1137 DL = MBBI->getDebugLoc(); 1138 1139 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1140 // instruction, merge the two instructions. 1141 if (NumBytes || MFI->hasVarSizedObjects()) 1142 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 1143 1144 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1145 // slot before popping them off! Same applies for the case, when stack was 1146 // realigned 1147 if (needsStackRealignment(MF)) { 1148 // We cannot use LEA here, because stack pointer was realigned. We need to 1149 // deallocate local frame back 1150 if (CSSize) { 1151 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); 1152 MBBI = prior(LastCSPop); 1153 } 1154 1155 BuildMI(MBB, MBBI, DL, 1156 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1157 StackPtr).addReg(FramePtr); 1158 } else if (MFI->hasVarSizedObjects()) { 1159 if (CSSize) { 1160 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; 1161 MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), 1162 FramePtr, false, -CSSize); 1163 MBB.insert(MBBI, MI); 1164 } else 1165 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1166 StackPtr).addReg(FramePtr); 1167 1168 } else { 1169 // adjust stack pointer back: ESP += numbytes 1170 if (NumBytes) 1171 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); 1172 } 1173 1174 // We're returning from function via eh_return. 1175 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 1176 MBBI = prior(MBB.end()); 1177 MachineOperand &DestAddr = MBBI->getOperand(0); 1178 assert(DestAddr.isReg() && "Offset should be in register!"); 1179 BuildMI(MBB, MBBI, DL, 1180 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 1181 StackPtr).addReg(DestAddr.getReg()); 1182 // Tail call return: adjust the stack pointer and jump to callee 1183 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 1184 RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { 1185 MBBI = prior(MBB.end()); 1186 MachineOperand &JumpTarget = MBBI->getOperand(0); 1187 MachineOperand &StackAdjust = MBBI->getOperand(1); 1188 assert(StackAdjust.isImm() && "Expecting immediate value."); 1189 1190 // Adjust stack pointer. 1191 int StackAdj = StackAdjust.getImm(); 1192 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 1193 int Offset = 0; 1194 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 1195 // Incoporate the retaddr area. 1196 Offset = StackAdj-MaxTCDelta; 1197 assert(Offset >= 0 && "Offset should never be negative"); 1198 1199 if (Offset) { 1200 // Check for possible merge with preceeding ADD instruction. 1201 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1202 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); 1203 } 1204 1205 // Jump to label or value in register. 1206 if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) 1207 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)). 1208 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1209 else if (RetOpcode== X86::TCRETURNri64) 1210 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); 1211 else 1212 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); 1213 1214 // Delete the pseudo instruction TCRETURN. 1215 MBB.erase(MBBI); 1216 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 1217 (X86FI->getTCReturnAddrDelta() < 0)) { 1218 // Add the return addr area delta back since we are not tail calling. 1219 int delta = -1*X86FI->getTCReturnAddrDelta(); 1220 MBBI = prior(MBB.end()); 1221 // Check for possible merge with preceeding ADD instruction. 1222 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1223 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); 1224 } 1225} 1226 1227unsigned X86RegisterInfo::getRARegister() const { 1228 if (Is64Bit) 1229 return X86::RIP; // Should have dwarf #16 1230 else 1231 return X86::EIP; // Should have dwarf #8 1232} 1233 1234unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { 1235 return hasFP(MF) ? FramePtr : StackPtr; 1236} 1237 1238void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) 1239 const { 1240 // Calculate amount of bytes used for return address storing 1241 int stackGrowth = (Is64Bit ? -8 : -4); 1242 1243 // Initial state of the frame pointer is esp+4. 1244 MachineLocation Dst(MachineLocation::VirtualFP); 1245 MachineLocation Src(StackPtr, stackGrowth); 1246 Moves.push_back(MachineMove(0, Dst, Src)); 1247 1248 // Add return address to move list 1249 MachineLocation CSDst(StackPtr, stackGrowth); 1250 MachineLocation CSSrc(getRARegister()); 1251 Moves.push_back(MachineMove(0, CSDst, CSSrc)); 1252} 1253 1254unsigned X86RegisterInfo::getEHExceptionRegister() const { 1255 llvm_unreachable("What is the exception register"); 1256 return 0; 1257} 1258 1259unsigned X86RegisterInfo::getEHHandlerRegister() const { 1260 llvm_unreachable("What is the exception handler register"); 1261 return 0; 1262} 1263 1264namespace llvm { 1265unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { 1266 switch (VT.getSimpleVT().SimpleTy) { 1267 default: return Reg; 1268 case MVT::i8: 1269 if (High) { 1270 switch (Reg) { 1271 default: return 0; 1272 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1273 return X86::AH; 1274 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1275 return X86::DH; 1276 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1277 return X86::CH; 1278 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1279 return X86::BH; 1280 } 1281 } else { 1282 switch (Reg) { 1283 default: return 0; 1284 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1285 return X86::AL; 1286 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1287 return X86::DL; 1288 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1289 return X86::CL; 1290 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1291 return X86::BL; 1292 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1293 return X86::SIL; 1294 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1295 return X86::DIL; 1296 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1297 return X86::BPL; 1298 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1299 return X86::SPL; 1300 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1301 return X86::R8B; 1302 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1303 return X86::R9B; 1304 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1305 return X86::R10B; 1306 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1307 return X86::R11B; 1308 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1309 return X86::R12B; 1310 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1311 return X86::R13B; 1312 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1313 return X86::R14B; 1314 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1315 return X86::R15B; 1316 } 1317 } 1318 case MVT::i16: 1319 switch (Reg) { 1320 default: return Reg; 1321 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1322 return X86::AX; 1323 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1324 return X86::DX; 1325 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1326 return X86::CX; 1327 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1328 return X86::BX; 1329 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1330 return X86::SI; 1331 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1332 return X86::DI; 1333 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1334 return X86::BP; 1335 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1336 return X86::SP; 1337 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1338 return X86::R8W; 1339 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1340 return X86::R9W; 1341 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1342 return X86::R10W; 1343 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1344 return X86::R11W; 1345 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1346 return X86::R12W; 1347 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1348 return X86::R13W; 1349 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1350 return X86::R14W; 1351 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1352 return X86::R15W; 1353 } 1354 case MVT::i32: 1355 switch (Reg) { 1356 default: return Reg; 1357 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1358 return X86::EAX; 1359 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1360 return X86::EDX; 1361 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1362 return X86::ECX; 1363 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1364 return X86::EBX; 1365 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1366 return X86::ESI; 1367 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1368 return X86::EDI; 1369 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1370 return X86::EBP; 1371 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1372 return X86::ESP; 1373 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1374 return X86::R8D; 1375 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1376 return X86::R9D; 1377 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1378 return X86::R10D; 1379 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1380 return X86::R11D; 1381 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1382 return X86::R12D; 1383 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1384 return X86::R13D; 1385 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1386 return X86::R14D; 1387 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1388 return X86::R15D; 1389 } 1390 case MVT::i64: 1391 switch (Reg) { 1392 default: return Reg; 1393 case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: 1394 return X86::RAX; 1395 case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: 1396 return X86::RDX; 1397 case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: 1398 return X86::RCX; 1399 case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: 1400 return X86::RBX; 1401 case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: 1402 return X86::RSI; 1403 case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: 1404 return X86::RDI; 1405 case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: 1406 return X86::RBP; 1407 case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: 1408 return X86::RSP; 1409 case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: 1410 return X86::R8; 1411 case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: 1412 return X86::R9; 1413 case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: 1414 return X86::R10; 1415 case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: 1416 return X86::R11; 1417 case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: 1418 return X86::R12; 1419 case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: 1420 return X86::R13; 1421 case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: 1422 return X86::R14; 1423 case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: 1424 return X86::R15; 1425 } 1426 } 1427 1428 return Reg; 1429} 1430} 1431 1432#include "X86GenRegisterInfo.inc" 1433 1434namespace { 1435 struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass { 1436 static char ID; 1437 MSAC() : MachineFunctionPass(&ID) {} 1438 1439 virtual bool runOnMachineFunction(MachineFunction &MF) { 1440 MachineFrameInfo *FFI = MF.getFrameInfo(); 1441 MachineRegisterInfo &RI = MF.getRegInfo(); 1442 1443 // Calculate max stack alignment of all already allocated stack objects. 1444 unsigned MaxAlign = calculateMaxStackAlignment(FFI); 1445 1446 // Be over-conservative: scan over all vreg defs and find, whether vector 1447 // registers are used. If yes - there is probability, that vector register 1448 // will be spilled and thus stack needs to be aligned properly. 1449 for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; 1450 RegNum < RI.getLastVirtReg(); ++RegNum) 1451 MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); 1452 1453 if (FFI->getMaxAlignment() == MaxAlign) 1454 return false; 1455 1456 FFI->setMaxAlignment(MaxAlign); 1457 return true; 1458 } 1459 1460 virtual const char *getPassName() const { 1461 return "X86 Maximal Stack Alignment Calculator"; 1462 } 1463 1464 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 1465 AU.setPreservesCFG(); 1466 MachineFunctionPass::getAnalysisUsage(AU); 1467 } 1468 }; 1469 1470 char MSAC::ID = 0; 1471} 1472 1473FunctionPass* 1474llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); } 1475