X86RegisterInfo.cpp revision 72852a8cfb605056d87b644d2e36b1346051413d
1//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the X86 implementation of the TargetRegisterInfo class.
11// This file is responsible for the frame pointer elimination optimization
12// on X86.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86RegisterInfo.h"
18#include "X86InstrBuilder.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/Constants.h"
23#include "llvm/Function.h"
24#include "llvm/Type.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineLocation.h"
31#include "llvm/CodeGen/MachineModuleInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/Target/TargetFrameInfo.h"
35#include "llvm/Target/TargetInstrInfo.h"
36#include "llvm/Target/TargetMachine.h"
37#include "llvm/Target/TargetOptions.h"
38#include "llvm/ADT/BitVector.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/Support/ErrorHandling.h"
41using namespace llvm;
42
43X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
44                                 const TargetInstrInfo &tii)
45  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
46                         X86::ADJCALLSTACKDOWN64 :
47                         X86::ADJCALLSTACKDOWN32,
48                       tm.getSubtarget<X86Subtarget>().is64Bit() ?
49                         X86::ADJCALLSTACKUP64 :
50                         X86::ADJCALLSTACKUP32),
51    TM(tm), TII(tii) {
52  // Cache some information.
53  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
54  Is64Bit = Subtarget->is64Bit();
55  IsWin64 = Subtarget->isTargetWin64();
56  StackAlign = TM.getFrameInfo()->getStackAlignment();
57
58  if (Is64Bit) {
59    SlotSize = 8;
60    StackPtr = X86::RSP;
61    FramePtr = X86::RBP;
62  } else {
63    SlotSize = 4;
64    StackPtr = X86::ESP;
65    FramePtr = X86::EBP;
66  }
67}
68
69/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
70/// specific numbering, used in debug info and exception tables.
71int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
72  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
73  unsigned Flavour = DWARFFlavour::X86_64;
74
75  if (!Subtarget->is64Bit()) {
76    if (Subtarget->isTargetDarwin()) {
77      if (isEH)
78        Flavour = DWARFFlavour::X86_32_DarwinEH;
79      else
80        Flavour = DWARFFlavour::X86_32_Generic;
81    } else if (Subtarget->isTargetCygMing()) {
82      // Unsupported by now, just quick fallback
83      Flavour = DWARFFlavour::X86_32_Generic;
84    } else {
85      Flavour = DWARFFlavour::X86_32_Generic;
86    }
87  }
88
89  return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
90}
91
92/// getX86RegNum - This function maps LLVM register identifiers to their X86
93/// specific numbering, which is used in various places encoding instructions.
94unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
95  switch(RegNo) {
96  case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
97  case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
98  case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
99  case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
100  case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
101    return N86::ESP;
102  case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
103    return N86::EBP;
104  case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
105    return N86::ESI;
106  case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
107    return N86::EDI;
108
109  case X86::R8:  case X86::R8D:  case X86::R8W:  case X86::R8B:
110    return N86::EAX;
111  case X86::R9:  case X86::R9D:  case X86::R9W:  case X86::R9B:
112    return N86::ECX;
113  case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
114    return N86::EDX;
115  case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
116    return N86::EBX;
117  case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
118    return N86::ESP;
119  case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
120    return N86::EBP;
121  case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
122    return N86::ESI;
123  case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
124    return N86::EDI;
125
126  case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
127  case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
128    return RegNo-X86::ST0;
129
130  case X86::XMM0: case X86::XMM8:
131  case X86::YMM0: case X86::YMM8: case X86::MM0:
132    return 0;
133  case X86::XMM1: case X86::XMM9:
134  case X86::YMM1: case X86::YMM9: case X86::MM1:
135    return 1;
136  case X86::XMM2: case X86::XMM10:
137  case X86::YMM2: case X86::YMM10: case X86::MM2:
138    return 2;
139  case X86::XMM3: case X86::XMM11:
140  case X86::YMM3: case X86::YMM11: case X86::MM3:
141    return 3;
142  case X86::XMM4: case X86::XMM12:
143  case X86::YMM4: case X86::YMM12: case X86::MM4:
144    return 4;
145  case X86::XMM5: case X86::XMM13:
146  case X86::YMM5: case X86::YMM13: case X86::MM5:
147    return 5;
148  case X86::XMM6: case X86::XMM14:
149  case X86::YMM6: case X86::YMM14: case X86::MM6:
150    return 6;
151  case X86::XMM7: case X86::XMM15:
152  case X86::YMM7: case X86::YMM15: case X86::MM7:
153    return 7;
154
155  case X86::ES:
156    return 0;
157  case X86::CS:
158    return 1;
159  case X86::SS:
160    return 2;
161  case X86::DS:
162    return 3;
163  case X86::FS:
164    return 4;
165  case X86::GS:
166    return 5;
167
168  case X86::CR0:
169    return 0;
170  case X86::CR1:
171    return 1;
172  case X86::CR2:
173    return 2;
174  case X86::CR3:
175    return 3;
176  case X86::CR4:
177    return 4;
178
179  case X86::DR0:
180    return 0;
181  case X86::DR1:
182    return 1;
183  case X86::DR2:
184    return 2;
185  case X86::DR3:
186    return 3;
187  case X86::DR4:
188    return 4;
189  case X86::DR5:
190    return 5;
191  case X86::DR6:
192    return 6;
193  case X86::DR7:
194    return 7;
195
196  default:
197    assert(isVirtualRegister(RegNo) && "Unknown physical register!");
198    llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
199    return 0;
200  }
201}
202
203const TargetRegisterClass *
204X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
205                                          const TargetRegisterClass *B,
206                                          unsigned SubIdx) const {
207  switch (SubIdx) {
208  default: return 0;
209  case X86::sub_8bit:
210    if (B == &X86::GR8RegClass) {
211      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
212        return A;
213    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
214      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
215          A == &X86::GR64_NOREXRegClass ||
216          A == &X86::GR64_NOSPRegClass ||
217          A == &X86::GR64_NOREX_NOSPRegClass)
218        return &X86::GR64_ABCDRegClass;
219      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
220               A == &X86::GR32_NOREXRegClass ||
221               A == &X86::GR32_NOSPRegClass)
222        return &X86::GR32_ABCDRegClass;
223      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
224               A == &X86::GR16_NOREXRegClass)
225        return &X86::GR16_ABCDRegClass;
226    } else if (B == &X86::GR8_NOREXRegClass) {
227      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
228          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
229        return &X86::GR64_NOREXRegClass;
230      else if (A == &X86::GR64_ABCDRegClass)
231        return &X86::GR64_ABCDRegClass;
232      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
233               A == &X86::GR32_NOSPRegClass)
234        return &X86::GR32_NOREXRegClass;
235      else if (A == &X86::GR32_ABCDRegClass)
236        return &X86::GR32_ABCDRegClass;
237      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
238        return &X86::GR16_NOREXRegClass;
239      else if (A == &X86::GR16_ABCDRegClass)
240        return &X86::GR16_ABCDRegClass;
241    }
242    break;
243  case X86::sub_8bit_hi:
244    if (B == &X86::GR8_ABCD_HRegClass) {
245      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
246          A == &X86::GR64_NOREXRegClass ||
247          A == &X86::GR64_NOSPRegClass ||
248          A == &X86::GR64_NOREX_NOSPRegClass)
249        return &X86::GR64_ABCDRegClass;
250      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
251               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
252        return &X86::GR32_ABCDRegClass;
253      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
254               A == &X86::GR16_NOREXRegClass)
255        return &X86::GR16_ABCDRegClass;
256    }
257    break;
258  case X86::sub_16bit:
259    if (B == &X86::GR16RegClass) {
260      if (A->getSize() == 4 || A->getSize() == 8)
261        return A;
262    } else if (B == &X86::GR16_ABCDRegClass) {
263      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
264          A == &X86::GR64_NOREXRegClass ||
265          A == &X86::GR64_NOSPRegClass ||
266          A == &X86::GR64_NOREX_NOSPRegClass)
267        return &X86::GR64_ABCDRegClass;
268      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
269               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
270        return &X86::GR32_ABCDRegClass;
271    } else if (B == &X86::GR16_NOREXRegClass) {
272      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
273          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
274        return &X86::GR64_NOREXRegClass;
275      else if (A == &X86::GR64_ABCDRegClass)
276        return &X86::GR64_ABCDRegClass;
277      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
278               A == &X86::GR32_NOSPRegClass)
279        return &X86::GR32_NOREXRegClass;
280      else if (A == &X86::GR32_ABCDRegClass)
281        return &X86::GR64_ABCDRegClass;
282    }
283    break;
284  case X86::sub_32bit:
285    if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
286      if (A->getSize() == 8)
287        return A;
288    } else if (B == &X86::GR32_ABCDRegClass) {
289      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
290          A == &X86::GR64_NOREXRegClass ||
291          A == &X86::GR64_NOSPRegClass ||
292          A == &X86::GR64_NOREX_NOSPRegClass)
293        return &X86::GR64_ABCDRegClass;
294    } else if (B == &X86::GR32_NOREXRegClass) {
295      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
296          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
297        return &X86::GR64_NOREXRegClass;
298      else if (A == &X86::GR64_ABCDRegClass)
299        return &X86::GR64_ABCDRegClass;
300    }
301    break;
302  case X86::sub_ss:
303    if (B == &X86::FR32RegClass)
304      return A;
305    break;
306  case X86::sub_sd:
307    if (B == &X86::FR64RegClass)
308      return A;
309    break;
310  case X86::sub_xmm:
311    if (B == &X86::VR128RegClass)
312      return A;
313    break;
314  }
315  return 0;
316}
317
318const TargetRegisterClass *
319X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
320  switch (Kind) {
321  default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
322  case 0: // Normal GPRs.
323    if (TM.getSubtarget<X86Subtarget>().is64Bit())
324      return &X86::GR64RegClass;
325    return &X86::GR32RegClass;
326  case 1: // Normal GRPs except the stack pointer (for encoding reasons).
327    if (TM.getSubtarget<X86Subtarget>().is64Bit())
328      return &X86::GR64_NOSPRegClass;
329    return &X86::GR32_NOSPRegClass;
330  }
331}
332
333const TargetRegisterClass *
334X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
335  if (RC == &X86::CCRRegClass) {
336    if (Is64Bit)
337      return &X86::GR64RegClass;
338    else
339      return &X86::GR32RegClass;
340  }
341  return NULL;
342}
343
344const unsigned *
345X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
346  bool callsEHReturn = false;
347  bool ghcCall = false;
348
349  if (MF) {
350    callsEHReturn = MF->getMMI().callsEHReturn();
351    const Function *F = MF->getFunction();
352    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
353  }
354
355  static const unsigned GhcCalleeSavedRegs[] = {
356    0
357  };
358
359  static const unsigned CalleeSavedRegs32Bit[] = {
360    X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
361  };
362
363  static const unsigned CalleeSavedRegs32EHRet[] = {
364    X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
365  };
366
367  static const unsigned CalleeSavedRegs64Bit[] = {
368    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
369  };
370
371  static const unsigned CalleeSavedRegs64EHRet[] = {
372    X86::RAX, X86::RDX, X86::RBX, X86::R12,
373    X86::R13, X86::R14, X86::R15, X86::RBP, 0
374  };
375
376  static const unsigned CalleeSavedRegsWin64[] = {
377    X86::RBX,   X86::RBP,   X86::RDI,   X86::RSI,
378    X86::R12,   X86::R13,   X86::R14,   X86::R15,
379    X86::XMM6,  X86::XMM7,  X86::XMM8,  X86::XMM9,
380    X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
381    X86::XMM14, X86::XMM15, 0
382  };
383
384  if (ghcCall) {
385    return GhcCalleeSavedRegs;
386  } else if (Is64Bit) {
387    if (IsWin64)
388      return CalleeSavedRegsWin64;
389    else
390      return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
391  } else {
392    return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
393  }
394}
395
396BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
397  BitVector Reserved(getNumRegs());
398  // Set the stack-pointer register and its aliases as reserved.
399  Reserved.set(X86::RSP);
400  Reserved.set(X86::ESP);
401  Reserved.set(X86::SP);
402  Reserved.set(X86::SPL);
403
404  // Set the instruction pointer register and its aliases as reserved.
405  Reserved.set(X86::RIP);
406  Reserved.set(X86::EIP);
407  Reserved.set(X86::IP);
408
409  // Set the frame-pointer register and its aliases as reserved if needed.
410  if (hasFP(MF)) {
411    Reserved.set(X86::RBP);
412    Reserved.set(X86::EBP);
413    Reserved.set(X86::BP);
414    Reserved.set(X86::BPL);
415  }
416
417  // Mark the x87 stack registers as reserved, since they don't behave normally
418  // with respect to liveness. We don't fully model the effects of x87 stack
419  // pushes and pops after stackification.
420  Reserved.set(X86::ST0);
421  Reserved.set(X86::ST1);
422  Reserved.set(X86::ST2);
423  Reserved.set(X86::ST3);
424  Reserved.set(X86::ST4);
425  Reserved.set(X86::ST5);
426  Reserved.set(X86::ST6);
427  Reserved.set(X86::ST7);
428  return Reserved;
429}
430
431//===----------------------------------------------------------------------===//
432// Stack Frame Processing methods
433//===----------------------------------------------------------------------===//
434
435/// hasFP - Return true if the specified function should have a dedicated frame
436/// pointer register.  This is true if the function has variable sized allocas
437/// or if frame pointer elimination is disabled.
438bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
439  const MachineFrameInfo *MFI = MF.getFrameInfo();
440  const MachineModuleInfo &MMI = MF.getMMI();
441
442  return (DisableFramePointerElim(MF) ||
443          needsStackRealignment(MF) ||
444          MFI->hasVarSizedObjects() ||
445          MFI->isFrameAddressTaken() ||
446          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
447          MMI.callsUnwindInit());
448}
449
450bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
451  const MachineFrameInfo *MFI = MF.getFrameInfo();
452  return (RealignStack &&
453          !MFI->hasVarSizedObjects());
454}
455
456bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
457  const MachineFrameInfo *MFI = MF.getFrameInfo();
458  const Function *F = MF.getFunction();
459  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
460                               F->hasFnAttr(Attribute::StackAlignment));
461
462  // FIXME: Currently we don't support stack realignment for functions with
463  //        variable-sized allocas.
464  // FIXME: It's more complicated than this...
465  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
466    report_fatal_error(
467      "Stack realignment in presense of dynamic allocas is not supported");
468
469  return requiresRealignment && canRealignStack(MF);
470}
471
472bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
473  return !MF.getFrameInfo()->hasVarSizedObjects();
474}
475
476bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
477                                           unsigned Reg, int &FrameIdx) const {
478  if (Reg == FramePtr && hasFP(MF)) {
479    FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
480    return true;
481  }
482  return false;
483}
484
485int
486X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
487  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
488  const MachineFrameInfo *MFI = MF.getFrameInfo();
489  int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
490  uint64_t StackSize = MFI->getStackSize();
491
492  if (needsStackRealignment(MF)) {
493    if (FI < 0) {
494      // Skip the saved EBP.
495      Offset += SlotSize;
496    } else {
497      unsigned Align = MFI->getObjectAlignment(FI);
498      assert((-(Offset + StackSize)) % Align == 0);
499      Align = 0;
500      return Offset + StackSize;
501    }
502    // FIXME: Support tail calls
503  } else {
504    if (!hasFP(MF))
505      return Offset + StackSize;
506
507    // Skip the saved EBP.
508    Offset += SlotSize;
509
510    // Skip the RETADDR move area
511    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
512    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
513    if (TailCallReturnAddrDelta < 0)
514      Offset -= TailCallReturnAddrDelta;
515  }
516
517  return Offset;
518}
519
520static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
521  if (is64Bit) {
522    if (isInt<8>(Imm))
523      return X86::SUB64ri8;
524    return X86::SUB64ri32;
525  } else {
526    if (isInt<8>(Imm))
527      return X86::SUB32ri8;
528    return X86::SUB32ri;
529  }
530}
531
532static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
533  if (is64Bit) {
534    if (isInt<8>(Imm))
535      return X86::ADD64ri8;
536    return X86::ADD64ri32;
537  } else {
538    if (isInt<8>(Imm))
539      return X86::ADD32ri8;
540    return X86::ADD32ri;
541  }
542}
543
544void X86RegisterInfo::
545eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
546                              MachineBasicBlock::iterator I) const {
547  if (!hasReservedCallFrame(MF)) {
548    // If the stack pointer can be changed after prologue, turn the
549    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
550    // adjcallstackdown instruction into 'add ESP, <amt>'
551    // TODO: consider using push / pop instead of sub + store / add
552    MachineInstr *Old = I;
553    uint64_t Amount = Old->getOperand(0).getImm();
554    if (Amount != 0) {
555      // We need to keep the stack aligned properly.  To do this, we round the
556      // amount of space needed for the outgoing arguments up to the next
557      // alignment boundary.
558      Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
559
560      MachineInstr *New = 0;
561      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
562        New = BuildMI(MF, Old->getDebugLoc(),
563                      TII.get(getSUBriOpcode(Is64Bit, Amount)),
564                      StackPtr)
565          .addReg(StackPtr)
566          .addImm(Amount);
567      } else {
568        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
569
570        // Factor out the amount the callee already popped.
571        uint64_t CalleeAmt = Old->getOperand(1).getImm();
572        Amount -= CalleeAmt;
573
574      if (Amount) {
575          unsigned Opc = getADDriOpcode(Is64Bit, Amount);
576          New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
577            .addReg(StackPtr)
578            .addImm(Amount);
579        }
580      }
581
582      if (New) {
583        // The EFLAGS implicit def is dead.
584        New->getOperand(3).setIsDead();
585
586        // Replace the pseudo instruction with a new instruction.
587        MBB.insert(I, New);
588      }
589    }
590  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
591    // If we are performing frame pointer elimination and if the callee pops
592    // something off the stack pointer, add it back.  We do this until we have
593    // more advanced stack pointer tracking ability.
594    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
595      unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
596      MachineInstr *Old = I;
597      MachineInstr *New =
598        BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
599                StackPtr)
600          .addReg(StackPtr)
601          .addImm(CalleeAmt);
602
603      // The EFLAGS implicit def is dead.
604      New->getOperand(3).setIsDead();
605      MBB.insert(I, New);
606    }
607  }
608
609  MBB.erase(I);
610}
611
612unsigned
613X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
614                                     int SPAdj, FrameIndexValue *Value,
615                                     RegScavenger *RS) const{
616  assert(SPAdj == 0 && "Unexpected");
617
618  unsigned i = 0;
619  MachineInstr &MI = *II;
620  MachineFunction &MF = *MI.getParent()->getParent();
621
622  while (!MI.getOperand(i).isFI()) {
623    ++i;
624    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
625  }
626
627  int FrameIndex = MI.getOperand(i).getIndex();
628  unsigned BasePtr;
629
630  unsigned Opc = MI.getOpcode();
631  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
632  if (needsStackRealignment(MF))
633    BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
634  else if (AfterFPPop)
635    BasePtr = StackPtr;
636  else
637    BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
638
639  // This must be part of a four operand memory reference.  Replace the
640  // FrameIndex with base register with EBP.  Add an offset to the offset.
641  MI.getOperand(i).ChangeToRegister(BasePtr, false);
642
643  // Now add the frame object offset to the offset from EBP.
644  int FIOffset;
645  if (AfterFPPop) {
646    // Tail call jmp happens after FP is popped.
647    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
648    const MachineFrameInfo *MFI = MF.getFrameInfo();
649    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
650  } else
651    FIOffset = getFrameIndexOffset(MF, FrameIndex);
652
653  if (MI.getOperand(i+3).isImm()) {
654    // Offset is a 32-bit integer.
655    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
656    MI.getOperand(i + 3).ChangeToImmediate(Offset);
657  } else {
658    // Offset is symbolic. This is extremely rare.
659    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
660    MI.getOperand(i+3).setOffset(Offset);
661  }
662  return 0;
663}
664
665void
666X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
667                                                      RegScavenger *RS) const {
668  MachineFrameInfo *MFI = MF.getFrameInfo();
669
670  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
671  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
672
673  if (TailCallReturnAddrDelta < 0) {
674    // create RETURNADDR area
675    //   arg
676    //   arg
677    //   RETADDR
678    //   { ...
679    //     RETADDR area
680    //     ...
681    //   }
682    //   [EBP]
683    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
684                           (-1U*SlotSize)+TailCallReturnAddrDelta, true);
685  }
686
687  if (hasFP(MF)) {
688    assert((TailCallReturnAddrDelta <= 0) &&
689           "The Delta should always be zero or negative");
690    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
691
692    // Create a frame entry for the EBP register that must be saved.
693    int FrameIdx = MFI->CreateFixedObject(SlotSize,
694                                          -(int)SlotSize +
695                                          TFI.getOffsetOfLocalArea() +
696                                          TailCallReturnAddrDelta,
697                                          true);
698    assert(FrameIdx == MFI->getObjectIndexBegin() &&
699           "Slot for EBP register must be last in order to be found!");
700    FrameIdx = 0;
701  }
702}
703
704/// emitSPUpdate - Emit a series of instructions to increment / decrement the
705/// stack pointer by a constant value.
706static
707void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
708                  unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
709                  const TargetInstrInfo &TII) {
710  bool isSub = NumBytes < 0;
711  uint64_t Offset = isSub ? -NumBytes : NumBytes;
712  unsigned Opc = isSub ?
713    getSUBriOpcode(Is64Bit, Offset) :
714    getADDriOpcode(Is64Bit, Offset);
715  uint64_t Chunk = (1LL << 31) - 1;
716  DebugLoc DL = MBB.findDebugLoc(MBBI);
717
718  while (Offset) {
719    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
720    MachineInstr *MI =
721      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
722        .addReg(StackPtr)
723        .addImm(ThisVal);
724    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
725    Offset -= ThisVal;
726  }
727}
728
729/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
730static
731void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
732                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
733  if (MBBI == MBB.begin()) return;
734
735  MachineBasicBlock::iterator PI = prior(MBBI);
736  unsigned Opc = PI->getOpcode();
737  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
738       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
739      PI->getOperand(0).getReg() == StackPtr) {
740    if (NumBytes)
741      *NumBytes += PI->getOperand(2).getImm();
742    MBB.erase(PI);
743  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
744              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
745             PI->getOperand(0).getReg() == StackPtr) {
746    if (NumBytes)
747      *NumBytes -= PI->getOperand(2).getImm();
748    MBB.erase(PI);
749  }
750}
751
752/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
753static
754void mergeSPUpdatesDown(MachineBasicBlock &MBB,
755                        MachineBasicBlock::iterator &MBBI,
756                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
757  // FIXME: THIS ISN'T RUN!!!
758  return;
759
760  if (MBBI == MBB.end()) return;
761
762  MachineBasicBlock::iterator NI = llvm::next(MBBI);
763  if (NI == MBB.end()) return;
764
765  unsigned Opc = NI->getOpcode();
766  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
767       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
768      NI->getOperand(0).getReg() == StackPtr) {
769    if (NumBytes)
770      *NumBytes -= NI->getOperand(2).getImm();
771    MBB.erase(NI);
772    MBBI = NI;
773  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
774              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
775             NI->getOperand(0).getReg() == StackPtr) {
776    if (NumBytes)
777      *NumBytes += NI->getOperand(2).getImm();
778    MBB.erase(NI);
779    MBBI = NI;
780  }
781}
782
783/// mergeSPUpdates - Checks the instruction before/after the passed
784/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
785/// stack adjustment is returned as a positive value for ADD and a negative for
786/// SUB.
787static int mergeSPUpdates(MachineBasicBlock &MBB,
788                           MachineBasicBlock::iterator &MBBI,
789                           unsigned StackPtr,
790                           bool doMergeWithPrevious) {
791  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
792      (!doMergeWithPrevious && MBBI == MBB.end()))
793    return 0;
794
795  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
796  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
797  unsigned Opc = PI->getOpcode();
798  int Offset = 0;
799
800  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
801       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
802      PI->getOperand(0).getReg() == StackPtr){
803    Offset += PI->getOperand(2).getImm();
804    MBB.erase(PI);
805    if (!doMergeWithPrevious) MBBI = NI;
806  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
807              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
808             PI->getOperand(0).getReg() == StackPtr) {
809    Offset -= PI->getOperand(2).getImm();
810    MBB.erase(PI);
811    if (!doMergeWithPrevious) MBBI = NI;
812  }
813
814  return Offset;
815}
816
817void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
818                                                MCSymbol *Label,
819                                                unsigned FramePtr) const {
820  MachineFrameInfo *MFI = MF.getFrameInfo();
821  MachineModuleInfo &MMI = MF.getMMI();
822
823  // Add callee saved registers to move list.
824  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
825  if (CSI.empty()) return;
826
827  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
828  const TargetData *TD = MF.getTarget().getTargetData();
829  bool HasFP = hasFP(MF);
830
831  // Calculate amount of bytes used for return address storing.
832  int stackGrowth =
833    (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
834     TargetFrameInfo::StackGrowsUp ?
835     TD->getPointerSize() : -TD->getPointerSize());
836
837  // FIXME: This is dirty hack. The code itself is pretty mess right now.
838  // It should be rewritten from scratch and generalized sometimes.
839
840  // Determine maximum offset (minumum due to stack growth).
841  int64_t MaxOffset = 0;
842  for (std::vector<CalleeSavedInfo>::const_iterator
843         I = CSI.begin(), E = CSI.end(); I != E; ++I)
844    MaxOffset = std::min(MaxOffset,
845                         MFI->getObjectOffset(I->getFrameIdx()));
846
847  // Calculate offsets.
848  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
849  for (std::vector<CalleeSavedInfo>::const_iterator
850         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
851    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
852    unsigned Reg = I->getReg();
853    Offset = MaxOffset - Offset + saveAreaOffset;
854
855    // Don't output a new machine move if we're re-saving the frame
856    // pointer. This happens when the PrologEpilogInserter has inserted an extra
857    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
858    // generates one when frame pointers are used. If we generate a "machine
859    // move" for this extra "PUSH", the linker will lose track of the fact that
860    // the frame pointer should have the value of the first "PUSH" when it's
861    // trying to unwind.
862    //
863    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
864    //        another bug. I.e., one where we generate a prolog like this:
865    //
866    //          pushl  %ebp
867    //          movl   %esp, %ebp
868    //          pushl  %ebp
869    //          pushl  %esi
870    //           ...
871    //
872    //        The immediate re-push of EBP is unnecessary. At the least, it's an
873    //        optimization bug. EBP can be used as a scratch register in certain
874    //        cases, but probably not when we have a frame pointer.
875    if (HasFP && FramePtr == Reg)
876      continue;
877
878    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
879    MachineLocation CSSrc(Reg);
880    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
881  }
882}
883
884/// emitPrologue - Push callee-saved registers onto the stack, which
885/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
886/// space for local variables. Also emit labels used by the exception handler to
887/// generate the exception handling frames.
888void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
889  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
890  MachineBasicBlock::iterator MBBI = MBB.begin();
891  MachineFrameInfo *MFI = MF.getFrameInfo();
892  const Function *Fn = MF.getFunction();
893  const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
894  MachineModuleInfo &MMI = MF.getMMI();
895  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
896  bool needsFrameMoves = MMI.hasDebugInfo() ||
897                          !Fn->doesNotThrow() || UnwindTablesMandatory;
898  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
899  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
900  bool HasFP = hasFP(MF);
901  DebugLoc DL;
902
903  // Add RETADDR move area to callee saved frame size.
904  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
905  if (TailCallReturnAddrDelta < 0)
906    X86FI->setCalleeSavedFrameSize(
907      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
908
909  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
910  // function, and use up to 128 bytes of stack space, don't have a frame
911  // pointer, calls, or dynamic alloca then we do not need to adjust the
912  // stack pointer (we fit in the Red Zone).
913  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
914      !needsStackRealignment(MF) &&
915      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
916      !MFI->adjustsStack() &&                      // No calls.
917      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
918    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
919    if (HasFP) MinSize += SlotSize;
920    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
921    MFI->setStackSize(StackSize);
922  } else if (Subtarget->isTargetWin64()) {
923    // We need to always allocate 32 bytes as register spill area.
924    // FIXME: We might reuse these 32 bytes for leaf functions.
925    StackSize += 32;
926    MFI->setStackSize(StackSize);
927  }
928
929  // Insert stack pointer adjustment for later moving of return addr.  Only
930  // applies to tail call optimized functions where the callee argument stack
931  // size is bigger than the callers.
932  if (TailCallReturnAddrDelta < 0) {
933    MachineInstr *MI =
934      BuildMI(MBB, MBBI, DL,
935              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
936              StackPtr)
937        .addReg(StackPtr)
938        .addImm(-TailCallReturnAddrDelta);
939    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
940  }
941
942  // Mapping for machine moves:
943  //
944  //   DST: VirtualFP AND
945  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
946  //        ELSE                        => DW_CFA_def_cfa
947  //
948  //   SRC: VirtualFP AND
949  //        DST: Register               => DW_CFA_def_cfa_register
950  //
951  //   ELSE
952  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
953  //        REG < 64                    => DW_CFA_offset + Reg
954  //        ELSE                        => DW_CFA_offset_extended
955
956  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
957  const TargetData *TD = MF.getTarget().getTargetData();
958  uint64_t NumBytes = 0;
959  int stackGrowth = -TD->getPointerSize();
960
961  if (HasFP) {
962    // Calculate required stack adjustment.
963    uint64_t FrameSize = StackSize - SlotSize;
964    if (needsStackRealignment(MF))
965      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
966
967    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
968
969    // Get the offset of the stack slot for the EBP register, which is
970    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
971    // Update the frame offset adjustment.
972    MFI->setOffsetAdjustment(-NumBytes);
973
974    // Save EBP/RBP into the appropriate stack slot.
975    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
976      .addReg(FramePtr, RegState::Kill);
977
978    if (needsFrameMoves) {
979      // Mark the place where EBP/RBP was saved.
980      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
981      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
982
983      // Define the current CFA rule to use the provided offset.
984      if (StackSize) {
985        MachineLocation SPDst(MachineLocation::VirtualFP);
986        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
987        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
988      } else {
989        // FIXME: Verify & implement for FP
990        MachineLocation SPDst(StackPtr);
991        MachineLocation SPSrc(StackPtr, stackGrowth);
992        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
993      }
994
995      // Change the rule for the FramePtr to be an "offset" rule.
996      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
997      MachineLocation FPSrc(FramePtr);
998      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
999    }
1000
1001    // Update EBP with the new base value...
1002    BuildMI(MBB, MBBI, DL,
1003            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
1004        .addReg(StackPtr);
1005
1006    if (needsFrameMoves) {
1007      // Mark effective beginning of when frame pointer becomes valid.
1008      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
1009      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
1010
1011      // Define the current CFA to use the EBP/RBP register.
1012      MachineLocation FPDst(FramePtr);
1013      MachineLocation FPSrc(MachineLocation::VirtualFP);
1014      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
1015    }
1016
1017    // Mark the FramePtr as live-in in every block except the entry.
1018    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
1019         I != E; ++I)
1020      I->addLiveIn(FramePtr);
1021
1022    // Realign stack
1023    if (needsStackRealignment(MF)) {
1024      MachineInstr *MI =
1025        BuildMI(MBB, MBBI, DL,
1026                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
1027                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
1028
1029      // The EFLAGS implicit def is dead.
1030      MI->getOperand(3).setIsDead();
1031    }
1032  } else {
1033    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
1034  }
1035
1036  // Skip the callee-saved push instructions.
1037  bool PushedRegs = false;
1038  int StackOffset = 2 * stackGrowth;
1039
1040  while (MBBI != MBB.end() &&
1041         (MBBI->getOpcode() == X86::PUSH32r ||
1042          MBBI->getOpcode() == X86::PUSH64r)) {
1043    PushedRegs = true;
1044    ++MBBI;
1045
1046    if (!HasFP && needsFrameMoves) {
1047      // Mark callee-saved push instruction.
1048      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
1049      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
1050
1051      // Define the current CFA rule to use the provided offset.
1052      unsigned Ptr = StackSize ?
1053        MachineLocation::VirtualFP : StackPtr;
1054      MachineLocation SPDst(Ptr);
1055      MachineLocation SPSrc(Ptr, StackOffset);
1056      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1057      StackOffset += stackGrowth;
1058    }
1059  }
1060
1061  DL = MBB.findDebugLoc(MBBI);
1062
1063  // Adjust stack pointer: ESP -= numbytes.
1064  if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
1065    // Check, whether EAX is livein for this function.
1066    bool isEAXAlive = false;
1067    for (MachineRegisterInfo::livein_iterator
1068           II = MF.getRegInfo().livein_begin(),
1069           EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
1070      unsigned Reg = II->first;
1071      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
1072                    Reg == X86::AH || Reg == X86::AL);
1073    }
1074
1075    // Function prologue calls _alloca to probe the stack when allocating more
1076    // than 4k bytes in one go. Touching the stack at 4K increments is necessary
1077    // to ensure that the guard pages used by the OS virtual memory manager are
1078    // allocated in correct sequence.
1079    if (!isEAXAlive) {
1080      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1081        .addImm(NumBytes);
1082      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
1083        .addExternalSymbol("_alloca")
1084        .addReg(StackPtr, RegState::Define | RegState::Implicit);
1085    } else {
1086      // Save EAX
1087      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1088        .addReg(X86::EAX, RegState::Kill);
1089
1090      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
1091      // allocated bytes for EAX.
1092      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1093        .addImm(NumBytes - 4);
1094      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
1095        .addExternalSymbol("_alloca")
1096        .addReg(StackPtr, RegState::Define | RegState::Implicit);
1097
1098      // Restore EAX
1099      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
1100                                              X86::EAX),
1101                                      StackPtr, false, NumBytes - 4);
1102      MBB.insert(MBBI, MI);
1103    }
1104  } else if (NumBytes) {
1105    // If there is an SUB32ri of ESP immediately before this instruction, merge
1106    // the two. This can be the case when tail call elimination is enabled and
1107    // the callee has more arguments then the caller.
1108    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
1109
1110    // If there is an ADD32ri or SUB32ri of ESP immediately after this
1111    // instruction, merge the two instructions.
1112    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
1113
1114    if (NumBytes)
1115      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
1116  }
1117
1118  if ((NumBytes || PushedRegs) && needsFrameMoves) {
1119    // Mark end of stack pointer adjustment.
1120    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
1121    BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
1122
1123    if (!HasFP && NumBytes) {
1124      // Define the current CFA rule to use the provided offset.
1125      if (StackSize) {
1126        MachineLocation SPDst(MachineLocation::VirtualFP);
1127        MachineLocation SPSrc(MachineLocation::VirtualFP,
1128                              -StackSize + stackGrowth);
1129        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1130      } else {
1131        // FIXME: Verify & implement for FP
1132        MachineLocation SPDst(StackPtr);
1133        MachineLocation SPSrc(StackPtr, stackGrowth);
1134        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1135      }
1136    }
1137
1138    // Emit DWARF info specifying the offsets of the callee-saved registers.
1139    if (PushedRegs)
1140      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
1141  }
1142}
1143
1144void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
1145                                   MachineBasicBlock &MBB) const {
1146  const MachineFrameInfo *MFI = MF.getFrameInfo();
1147  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1148  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1149  unsigned RetOpcode = MBBI->getOpcode();
1150  DebugLoc DL = MBBI->getDebugLoc();
1151
1152  switch (RetOpcode) {
1153  default:
1154    llvm_unreachable("Can only insert epilog into returning blocks");
1155  case X86::RET:
1156  case X86::RETI:
1157  case X86::TCRETURNdi:
1158  case X86::TCRETURNri:
1159  case X86::TCRETURNmi:
1160  case X86::TCRETURNdi64:
1161  case X86::TCRETURNri64:
1162  case X86::TCRETURNmi64:
1163  case X86::EH_RETURN:
1164  case X86::EH_RETURN64:
1165    break;  // These are ok
1166  }
1167
1168  // Get the number of bytes to allocate from the FrameInfo.
1169  uint64_t StackSize = MFI->getStackSize();
1170  uint64_t MaxAlign  = MFI->getMaxAlignment();
1171  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1172  uint64_t NumBytes = 0;
1173
1174  if (hasFP(MF)) {
1175    // Calculate required stack adjustment.
1176    uint64_t FrameSize = StackSize - SlotSize;
1177    if (needsStackRealignment(MF))
1178      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
1179
1180    NumBytes = FrameSize - CSSize;
1181
1182    // Pop EBP.
1183    BuildMI(MBB, MBBI, DL,
1184            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
1185  } else {
1186    NumBytes = StackSize - CSSize;
1187  }
1188
1189  // Skip the callee-saved pop instructions.
1190  MachineBasicBlock::iterator LastCSPop = MBBI;
1191  while (MBBI != MBB.begin()) {
1192    MachineBasicBlock::iterator PI = prior(MBBI);
1193    unsigned Opc = PI->getOpcode();
1194
1195    if (Opc != X86::POP32r && Opc != X86::POP64r &&
1196        !PI->getDesc().isTerminator())
1197      break;
1198
1199    --MBBI;
1200  }
1201
1202  DL = MBBI->getDebugLoc();
1203
1204  // If there is an ADD32ri or SUB32ri of ESP immediately before this
1205  // instruction, merge the two instructions.
1206  if (NumBytes || MFI->hasVarSizedObjects())
1207    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
1208
1209  // If dynamic alloca is used, then reset esp to point to the last callee-saved
1210  // slot before popping them off! Same applies for the case, when stack was
1211  // realigned.
1212  if (needsStackRealignment(MF)) {
1213    // We cannot use LEA here, because stack pointer was realigned. We need to
1214    // deallocate local frame back.
1215    if (CSSize) {
1216      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
1217      MBBI = prior(LastCSPop);
1218    }
1219
1220    BuildMI(MBB, MBBI, DL,
1221            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
1222            StackPtr).addReg(FramePtr);
1223  } else if (MFI->hasVarSizedObjects()) {
1224    if (CSSize) {
1225      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
1226      MachineInstr *MI =
1227        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
1228                     FramePtr, false, -CSSize);
1229      MBB.insert(MBBI, MI);
1230    } else {
1231      BuildMI(MBB, MBBI, DL,
1232              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
1233        .addReg(FramePtr);
1234    }
1235  } else if (NumBytes) {
1236    // Adjust stack pointer back: ESP += numbytes.
1237    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
1238  }
1239
1240  // We're returning from function via eh_return.
1241  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
1242    MBBI = prior(MBB.end());
1243    MachineOperand &DestAddr  = MBBI->getOperand(0);
1244    assert(DestAddr.isReg() && "Offset should be in register!");
1245    BuildMI(MBB, MBBI, DL,
1246            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
1247            StackPtr).addReg(DestAddr.getReg());
1248  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
1249             RetOpcode == X86::TCRETURNmi ||
1250             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
1251             RetOpcode == X86::TCRETURNmi64) {
1252    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
1253    // Tail call return: adjust the stack pointer and jump to callee.
1254    MBBI = prior(MBB.end());
1255    MachineOperand &JumpTarget = MBBI->getOperand(0);
1256    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
1257    assert(StackAdjust.isImm() && "Expecting immediate value.");
1258
1259    // Adjust stack pointer.
1260    int StackAdj = StackAdjust.getImm();
1261    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
1262    int Offset = 0;
1263    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
1264
1265    // Incoporate the retaddr area.
1266    Offset = StackAdj-MaxTCDelta;
1267    assert(Offset >= 0 && "Offset should never be negative");
1268
1269    if (Offset) {
1270      // Check for possible merge with preceeding ADD instruction.
1271      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1272      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
1273    }
1274
1275    // Jump to label or value in register.
1276    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
1277      BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
1278                                     ? X86::TAILJMPd : X86::TAILJMPd64)).
1279        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
1280                         JumpTarget.getTargetFlags());
1281    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
1282      MachineInstrBuilder MIB =
1283        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
1284                                       ? X86::TAILJMPm : X86::TAILJMPm64));
1285      for (unsigned i = 0; i != 5; ++i)
1286        MIB.addOperand(MBBI->getOperand(i));
1287    } else if (RetOpcode == X86::TCRETURNri64) {
1288      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
1289        addReg(JumpTarget.getReg(), RegState::Kill);
1290    } else {
1291      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
1292        addReg(JumpTarget.getReg(), RegState::Kill);
1293    }
1294
1295    MachineInstr *NewMI = prior(MBBI);
1296    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
1297      NewMI->addOperand(MBBI->getOperand(i));
1298
1299    // Delete the pseudo instruction TCRETURN.
1300    MBB.erase(MBBI);
1301  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
1302             (X86FI->getTCReturnAddrDelta() < 0)) {
1303    // Add the return addr area delta back since we are not tail calling.
1304    int delta = -1*X86FI->getTCReturnAddrDelta();
1305    MBBI = prior(MBB.end());
1306
1307    // Check for possible merge with preceeding ADD instruction.
1308    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1309    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
1310  }
1311}
1312
1313unsigned X86RegisterInfo::getRARegister() const {
1314  return Is64Bit ? X86::RIP     // Should have dwarf #16.
1315                 : X86::EIP;    // Should have dwarf #8.
1316}
1317
1318unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1319  return hasFP(MF) ? FramePtr : StackPtr;
1320}
1321
1322void
1323X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
1324  // Calculate amount of bytes used for return address storing
1325  int stackGrowth = (Is64Bit ? -8 : -4);
1326
1327  // Initial state of the frame pointer is esp+stackGrowth.
1328  MachineLocation Dst(MachineLocation::VirtualFP);
1329  MachineLocation Src(StackPtr, stackGrowth);
1330  Moves.push_back(MachineMove(0, Dst, Src));
1331
1332  // Add return address to move list
1333  MachineLocation CSDst(StackPtr, stackGrowth);
1334  MachineLocation CSSrc(getRARegister());
1335  Moves.push_back(MachineMove(0, CSDst, CSSrc));
1336}
1337
1338unsigned X86RegisterInfo::getEHExceptionRegister() const {
1339  llvm_unreachable("What is the exception register");
1340  return 0;
1341}
1342
1343unsigned X86RegisterInfo::getEHHandlerRegister() const {
1344  llvm_unreachable("What is the exception handler register");
1345  return 0;
1346}
1347
1348namespace llvm {
1349unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
1350  switch (VT.getSimpleVT().SimpleTy) {
1351  default: return Reg;
1352  case MVT::i8:
1353    if (High) {
1354      switch (Reg) {
1355      default: return 0;
1356      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1357        return X86::AH;
1358      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1359        return X86::DH;
1360      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1361        return X86::CH;
1362      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1363        return X86::BH;
1364      }
1365    } else {
1366      switch (Reg) {
1367      default: return 0;
1368      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1369        return X86::AL;
1370      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1371        return X86::DL;
1372      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1373        return X86::CL;
1374      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1375        return X86::BL;
1376      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1377        return X86::SIL;
1378      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1379        return X86::DIL;
1380      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1381        return X86::BPL;
1382      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1383        return X86::SPL;
1384      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1385        return X86::R8B;
1386      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1387        return X86::R9B;
1388      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1389        return X86::R10B;
1390      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1391        return X86::R11B;
1392      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1393        return X86::R12B;
1394      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1395        return X86::R13B;
1396      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1397        return X86::R14B;
1398      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1399        return X86::R15B;
1400      }
1401    }
1402  case MVT::i16:
1403    switch (Reg) {
1404    default: return Reg;
1405    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1406      return X86::AX;
1407    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1408      return X86::DX;
1409    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1410      return X86::CX;
1411    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1412      return X86::BX;
1413    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1414      return X86::SI;
1415    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1416      return X86::DI;
1417    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1418      return X86::BP;
1419    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1420      return X86::SP;
1421    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1422      return X86::R8W;
1423    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1424      return X86::R9W;
1425    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1426      return X86::R10W;
1427    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1428      return X86::R11W;
1429    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1430      return X86::R12W;
1431    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1432      return X86::R13W;
1433    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1434      return X86::R14W;
1435    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1436      return X86::R15W;
1437    }
1438  case MVT::i32:
1439    switch (Reg) {
1440    default: return Reg;
1441    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1442      return X86::EAX;
1443    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1444      return X86::EDX;
1445    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1446      return X86::ECX;
1447    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1448      return X86::EBX;
1449    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1450      return X86::ESI;
1451    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1452      return X86::EDI;
1453    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1454      return X86::EBP;
1455    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1456      return X86::ESP;
1457    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1458      return X86::R8D;
1459    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1460      return X86::R9D;
1461    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1462      return X86::R10D;
1463    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1464      return X86::R11D;
1465    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1466      return X86::R12D;
1467    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1468      return X86::R13D;
1469    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1470      return X86::R14D;
1471    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1472      return X86::R15D;
1473    }
1474  case MVT::i64:
1475    switch (Reg) {
1476    default: return Reg;
1477    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1478      return X86::RAX;
1479    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1480      return X86::RDX;
1481    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1482      return X86::RCX;
1483    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1484      return X86::RBX;
1485    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1486      return X86::RSI;
1487    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1488      return X86::RDI;
1489    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1490      return X86::RBP;
1491    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1492      return X86::RSP;
1493    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1494      return X86::R8;
1495    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1496      return X86::R9;
1497    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1498      return X86::R10;
1499    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1500      return X86::R11;
1501    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1502      return X86::R12;
1503    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1504      return X86::R13;
1505    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1506      return X86::R14;
1507    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1508      return X86::R15;
1509    }
1510  }
1511
1512  return Reg;
1513}
1514}
1515
1516#include "X86GenRegisterInfo.inc"
1517
1518namespace {
1519  struct MSAH : public MachineFunctionPass {
1520    static char ID;
1521    MSAH() : MachineFunctionPass(&ID) {}
1522
1523    virtual bool runOnMachineFunction(MachineFunction &MF) {
1524      const X86TargetMachine *TM =
1525        static_cast<const X86TargetMachine *>(&MF.getTarget());
1526      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
1527      MachineRegisterInfo &RI = MF.getRegInfo();
1528      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1529      unsigned StackAlignment = X86RI->getStackAlignment();
1530
1531      // Be over-conservative: scan over all vreg defs and find whether vector
1532      // registers are used. If yes, there is a possibility that vector register
1533      // will be spilled and thus require dynamic stack realignment.
1534      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
1535           RegNum < RI.getLastVirtReg(); ++RegNum)
1536        if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
1537          FuncInfo->setReserveFP(true);
1538          return true;
1539        }
1540
1541      // Nothing to do
1542      return false;
1543    }
1544
1545    virtual const char *getPassName() const {
1546      return "X86 Maximal Stack Alignment Check";
1547    }
1548
1549    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
1550      AU.setPreservesCFG();
1551      MachineFunctionPass::getAnalysisUsage(AU);
1552    }
1553  };
1554
1555  char MSAH::ID = 0;
1556}
1557
1558FunctionPass*
1559llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
1560