X86RegisterInfo.cpp revision 22c0e97c567dd15c819906035d15c745100e75bc
1//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the X86 implementation of the TargetRegisterInfo class.
11// This file is responsible for the frame pointer elimination optimization
12// on X86.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86RegisterInfo.h"
18#include "X86InstrBuilder.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/Constants.h"
23#include "llvm/Function.h"
24#include "llvm/Type.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineLocation.h"
31#include "llvm/CodeGen/MachineModuleInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/Target/TargetFrameInfo.h"
35#include "llvm/Target/TargetInstrInfo.h"
36#include "llvm/Target/TargetMachine.h"
37#include "llvm/Target/TargetOptions.h"
38#include "llvm/ADT/BitVector.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/Support/ErrorHandling.h"
41using namespace llvm;
42
43X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
44                                 const TargetInstrInfo &tii)
45  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
46                         X86::ADJCALLSTACKDOWN64 :
47                         X86::ADJCALLSTACKDOWN32,
48                       tm.getSubtarget<X86Subtarget>().is64Bit() ?
49                         X86::ADJCALLSTACKUP64 :
50                         X86::ADJCALLSTACKUP32),
51    TM(tm), TII(tii) {
52  // Cache some information.
53  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
54  Is64Bit = Subtarget->is64Bit();
55  IsWin64 = Subtarget->isTargetWin64();
56  StackAlign = TM.getFrameInfo()->getStackAlignment();
57
58  if (Is64Bit) {
59    SlotSize = 8;
60    StackPtr = X86::RSP;
61    FramePtr = X86::RBP;
62  } else {
63    SlotSize = 4;
64    StackPtr = X86::ESP;
65    FramePtr = X86::EBP;
66  }
67}
68
69/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
70/// specific numbering, used in debug info and exception tables.
71int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
72  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
73  unsigned Flavour = DWARFFlavour::X86_64;
74
75  if (!Subtarget->is64Bit()) {
76    if (Subtarget->isTargetDarwin()) {
77      if (isEH)
78        Flavour = DWARFFlavour::X86_32_DarwinEH;
79      else
80        Flavour = DWARFFlavour::X86_32_Generic;
81    } else if (Subtarget->isTargetCygMing()) {
82      // Unsupported by now, just quick fallback
83      Flavour = DWARFFlavour::X86_32_Generic;
84    } else {
85      Flavour = DWARFFlavour::X86_32_Generic;
86    }
87  }
88
89  return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
90}
91
92/// getX86RegNum - This function maps LLVM register identifiers to their X86
93/// specific numbering, which is used in various places encoding instructions.
94unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
95  switch(RegNo) {
96  case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
97  case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
98  case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
99  case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
100  case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
101    return N86::ESP;
102  case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
103    return N86::EBP;
104  case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
105    return N86::ESI;
106  case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
107    return N86::EDI;
108
109  case X86::R8:  case X86::R8D:  case X86::R8W:  case X86::R8B:
110    return N86::EAX;
111  case X86::R9:  case X86::R9D:  case X86::R9W:  case X86::R9B:
112    return N86::ECX;
113  case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
114    return N86::EDX;
115  case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
116    return N86::EBX;
117  case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
118    return N86::ESP;
119  case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
120    return N86::EBP;
121  case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
122    return N86::ESI;
123  case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
124    return N86::EDI;
125
126  case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
127  case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
128    return RegNo-X86::ST0;
129
130  case X86::XMM0: case X86::XMM8: case X86::MM0:
131    return 0;
132  case X86::XMM1: case X86::XMM9: case X86::MM1:
133    return 1;
134  case X86::XMM2: case X86::XMM10: case X86::MM2:
135    return 2;
136  case X86::XMM3: case X86::XMM11: case X86::MM3:
137    return 3;
138  case X86::XMM4: case X86::XMM12: case X86::MM4:
139    return 4;
140  case X86::XMM5: case X86::XMM13: case X86::MM5:
141    return 5;
142  case X86::XMM6: case X86::XMM14: case X86::MM6:
143    return 6;
144  case X86::XMM7: case X86::XMM15: case X86::MM7:
145    return 7;
146
147  default:
148    assert(isVirtualRegister(RegNo) && "Unknown physical register!");
149    llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
150    return 0;
151  }
152}
153
154const TargetRegisterClass *
155X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
156                                          const TargetRegisterClass *B,
157                                          unsigned SubIdx) const {
158  switch (SubIdx) {
159  default: return 0;
160  case X86::sub_8bit:
161  //case X86::sub_ss:
162    if (B == &X86::GR8RegClass) {
163      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
164        return A;
165    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
166      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
167          A == &X86::GR64_NOREXRegClass ||
168          A == &X86::GR64_NOSPRegClass ||
169          A == &X86::GR64_NOREX_NOSPRegClass)
170        return &X86::GR64_ABCDRegClass;
171      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
172               A == &X86::GR32_NOREXRegClass ||
173               A == &X86::GR32_NOSPRegClass)
174        return &X86::GR32_ABCDRegClass;
175      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
176               A == &X86::GR16_NOREXRegClass)
177        return &X86::GR16_ABCDRegClass;
178    } else if (B == &X86::GR8_NOREXRegClass) {
179      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
180          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
181        return &X86::GR64_NOREXRegClass;
182      else if (A == &X86::GR64_ABCDRegClass)
183        return &X86::GR64_ABCDRegClass;
184      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
185               A == &X86::GR32_NOSPRegClass)
186        return &X86::GR32_NOREXRegClass;
187      else if (A == &X86::GR32_ABCDRegClass)
188        return &X86::GR32_ABCDRegClass;
189      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
190        return &X86::GR16_NOREXRegClass;
191      else if (A == &X86::GR16_ABCDRegClass)
192        return &X86::GR16_ABCDRegClass;
193    } else if (B == &X86::FR32RegClass) {
194      return A;
195    }
196    break;
197  case X86::sub_8bit_hi:
198  //case X86::sub_sd:
199    if (B == &X86::GR8_ABCD_HRegClass) {
200      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
201          A == &X86::GR64_NOREXRegClass ||
202          A == &X86::GR64_NOSPRegClass ||
203          A == &X86::GR64_NOREX_NOSPRegClass)
204        return &X86::GR64_ABCDRegClass;
205      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
206               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
207        return &X86::GR32_ABCDRegClass;
208      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
209               A == &X86::GR16_NOREXRegClass)
210        return &X86::GR16_ABCDRegClass;
211    } else if (B == &X86::FR64RegClass) {
212      return A;
213    }
214    break;
215  case X86::sub_16bit:
216  //case X86::sub_xmm:
217    if (B == &X86::GR16RegClass) {
218      if (A->getSize() == 4 || A->getSize() == 8)
219        return A;
220    } else if (B == &X86::GR16_ABCDRegClass) {
221      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
222          A == &X86::GR64_NOREXRegClass ||
223          A == &X86::GR64_NOSPRegClass ||
224          A == &X86::GR64_NOREX_NOSPRegClass)
225        return &X86::GR64_ABCDRegClass;
226      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
227               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
228        return &X86::GR32_ABCDRegClass;
229    } else if (B == &X86::GR16_NOREXRegClass) {
230      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
231          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
232        return &X86::GR64_NOREXRegClass;
233      else if (A == &X86::GR64_ABCDRegClass)
234        return &X86::GR64_ABCDRegClass;
235      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
236               A == &X86::GR32_NOSPRegClass)
237        return &X86::GR32_NOREXRegClass;
238      else if (A == &X86::GR32_ABCDRegClass)
239        return &X86::GR64_ABCDRegClass;
240    } else if (B == &X86::VR128RegClass) {
241      return A;
242    }
243    break;
244  case X86::sub_32bit:
245    if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
246      if (A->getSize() == 8)
247        return A;
248    } else if (B == &X86::GR32_ABCDRegClass) {
249      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
250          A == &X86::GR64_NOREXRegClass ||
251          A == &X86::GR64_NOSPRegClass ||
252          A == &X86::GR64_NOREX_NOSPRegClass)
253        return &X86::GR64_ABCDRegClass;
254    } else if (B == &X86::GR32_NOREXRegClass) {
255      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
256          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
257        return &X86::GR64_NOREXRegClass;
258      else if (A == &X86::GR64_ABCDRegClass)
259        return &X86::GR64_ABCDRegClass;
260    }
261    break;
262  }
263  return 0;
264}
265
266const TargetRegisterClass *
267X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
268  switch (Kind) {
269  default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
270  case 0: // Normal GPRs.
271    if (TM.getSubtarget<X86Subtarget>().is64Bit())
272      return &X86::GR64RegClass;
273    return &X86::GR32RegClass;
274  case 1: // Normal GRPs except the stack pointer (for encoding reasons).
275    if (TM.getSubtarget<X86Subtarget>().is64Bit())
276      return &X86::GR64_NOSPRegClass;
277    return &X86::GR32_NOSPRegClass;
278  }
279}
280
281const TargetRegisterClass *
282X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
283  if (RC == &X86::CCRRegClass) {
284    if (Is64Bit)
285      return &X86::GR64RegClass;
286    else
287      return &X86::GR32RegClass;
288  }
289  return NULL;
290}
291
292const unsigned *
293X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
294  bool callsEHReturn = false;
295  bool ghcCall = false;
296
297  if (MF) {
298    callsEHReturn = MF->getMMI().callsEHReturn();
299    const Function *F = MF->getFunction();
300    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
301  }
302
303  static const unsigned GhcCalleeSavedRegs[] = {
304    0
305  };
306
307  static const unsigned CalleeSavedRegs32Bit[] = {
308    X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
309  };
310
311  static const unsigned CalleeSavedRegs32EHRet[] = {
312    X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
313  };
314
315  static const unsigned CalleeSavedRegs64Bit[] = {
316    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
317  };
318
319  static const unsigned CalleeSavedRegs64EHRet[] = {
320    X86::RAX, X86::RDX, X86::RBX, X86::R12,
321    X86::R13, X86::R14, X86::R15, X86::RBP, 0
322  };
323
324  static const unsigned CalleeSavedRegsWin64[] = {
325    X86::RBX,   X86::RBP,   X86::RDI,   X86::RSI,
326    X86::R12,   X86::R13,   X86::R14,   X86::R15,
327    X86::XMM6,  X86::XMM7,  X86::XMM8,  X86::XMM9,
328    X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
329    X86::XMM14, X86::XMM15, 0
330  };
331
332  if (ghcCall) {
333    return GhcCalleeSavedRegs;
334  } else if (Is64Bit) {
335    if (IsWin64)
336      return CalleeSavedRegsWin64;
337    else
338      return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
339  } else {
340    return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
341  }
342}
343
344const TargetRegisterClass* const*
345X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
346  bool callsEHReturn = false;
347  if (MF)
348    callsEHReturn = MF->getMMI().callsEHReturn();
349
350  static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = {
351    &X86::GR32RegClass, &X86::GR32RegClass,
352    &X86::GR32RegClass, &X86::GR32RegClass,  0
353  };
354  static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = {
355    &X86::GR32RegClass, &X86::GR32RegClass,
356    &X86::GR32RegClass, &X86::GR32RegClass,
357    &X86::GR32RegClass, &X86::GR32RegClass,  0
358  };
359  static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = {
360    &X86::GR64RegClass, &X86::GR64RegClass,
361    &X86::GR64RegClass, &X86::GR64RegClass,
362    &X86::GR64RegClass, &X86::GR64RegClass, 0
363  };
364  static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = {
365    &X86::GR64RegClass, &X86::GR64RegClass,
366    &X86::GR64RegClass, &X86::GR64RegClass,
367    &X86::GR64RegClass, &X86::GR64RegClass,
368    &X86::GR64RegClass, &X86::GR64RegClass, 0
369  };
370  static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = {
371    &X86::GR64RegClass,  &X86::GR64RegClass,
372    &X86::GR64RegClass,  &X86::GR64RegClass,
373    &X86::GR64RegClass,  &X86::GR64RegClass,
374    &X86::GR64RegClass,  &X86::GR64RegClass,
375    &X86::VR128RegClass, &X86::VR128RegClass,
376    &X86::VR128RegClass, &X86::VR128RegClass,
377    &X86::VR128RegClass, &X86::VR128RegClass,
378    &X86::VR128RegClass, &X86::VR128RegClass,
379    &X86::VR128RegClass, &X86::VR128RegClass, 0
380  };
381
382  if (Is64Bit) {
383    if (IsWin64)
384      return CalleeSavedRegClassesWin64;
385    else
386      return (callsEHReturn ?
387              CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit);
388  } else {
389    return (callsEHReturn ?
390            CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit);
391  }
392}
393
394BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
395  BitVector Reserved(getNumRegs());
396  // Set the stack-pointer register and its aliases as reserved.
397  Reserved.set(X86::RSP);
398  Reserved.set(X86::ESP);
399  Reserved.set(X86::SP);
400  Reserved.set(X86::SPL);
401
402  // Set the instruction pointer register and its aliases as reserved.
403  Reserved.set(X86::RIP);
404  Reserved.set(X86::EIP);
405  Reserved.set(X86::IP);
406
407  // Set the frame-pointer register and its aliases as reserved if needed.
408  if (hasFP(MF)) {
409    Reserved.set(X86::RBP);
410    Reserved.set(X86::EBP);
411    Reserved.set(X86::BP);
412    Reserved.set(X86::BPL);
413  }
414
415  // Mark the x87 stack registers as reserved, since they don't behave normally
416  // with respect to liveness. We don't fully model the effects of x87 stack
417  // pushes and pops after stackification.
418  Reserved.set(X86::ST0);
419  Reserved.set(X86::ST1);
420  Reserved.set(X86::ST2);
421  Reserved.set(X86::ST3);
422  Reserved.set(X86::ST4);
423  Reserved.set(X86::ST5);
424  Reserved.set(X86::ST6);
425  Reserved.set(X86::ST7);
426  return Reserved;
427}
428
429//===----------------------------------------------------------------------===//
430// Stack Frame Processing methods
431//===----------------------------------------------------------------------===//
432
433/// hasFP - Return true if the specified function should have a dedicated frame
434/// pointer register.  This is true if the function has variable sized allocas
435/// or if frame pointer elimination is disabled.
436bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
437  const MachineFrameInfo *MFI = MF.getFrameInfo();
438  const MachineModuleInfo &MMI = MF.getMMI();
439
440  return (DisableFramePointerElim(MF) ||
441          needsStackRealignment(MF) ||
442          MFI->hasVarSizedObjects() ||
443          MFI->isFrameAddressTaken() ||
444          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
445          MMI.callsUnwindInit());
446}
447
448bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
449  const MachineFrameInfo *MFI = MF.getFrameInfo();
450  return (RealignStack &&
451          !MFI->hasVarSizedObjects());
452}
453
454bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
455  const MachineFrameInfo *MFI = MF.getFrameInfo();
456  const Function *F = MF.getFunction();
457  bool requiresRealignment =
458    RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
459                     F->hasFnAttr(Attribute::StackAlignment));
460
461  // FIXME: Currently we don't support stack realignment for functions with
462  //        variable-sized allocas.
463  // FIXME: Temporary disable the error - it seems to be too conservative.
464  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
465    report_fatal_error(
466      "Stack realignment in presense of dynamic allocas is not supported");
467
468  return (requiresRealignment && !MFI->hasVarSizedObjects());
469}
470
471bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
472  return !MF.getFrameInfo()->hasVarSizedObjects();
473}
474
475bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
476                                           int &FrameIdx) const {
477  if (Reg == FramePtr && hasFP(MF)) {
478    FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
479    return true;
480  }
481  return false;
482}
483
484int
485X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
486  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
487  const MachineFrameInfo *MFI = MF.getFrameInfo();
488  int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
489  uint64_t StackSize = MFI->getStackSize();
490
491  if (needsStackRealignment(MF)) {
492    if (FI < 0) {
493      // Skip the saved EBP.
494      Offset += SlotSize;
495    } else {
496      unsigned Align = MFI->getObjectAlignment(FI);
497      assert((-(Offset + StackSize)) % Align == 0);
498      Align = 0;
499      return Offset + StackSize;
500    }
501    // FIXME: Support tail calls
502  } else {
503    if (!hasFP(MF))
504      return Offset + StackSize;
505
506    // Skip the saved EBP.
507    Offset += SlotSize;
508
509    // Skip the RETADDR move area
510    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
511    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
512    if (TailCallReturnAddrDelta < 0)
513      Offset -= TailCallReturnAddrDelta;
514  }
515
516  return Offset;
517}
518
519static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
520  if (is64Bit) {
521    if (isInt<8>(Imm))
522      return X86::SUB64ri8;
523    return X86::SUB64ri32;
524  } else {
525    if (isInt<8>(Imm))
526      return X86::SUB32ri8;
527    return X86::SUB32ri;
528  }
529}
530
531static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
532  if (is64Bit) {
533    if (isInt<8>(Imm))
534      return X86::ADD64ri8;
535    return X86::ADD64ri32;
536  } else {
537    if (isInt<8>(Imm))
538      return X86::ADD32ri8;
539    return X86::ADD32ri;
540  }
541}
542
543void X86RegisterInfo::
544eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
545                              MachineBasicBlock::iterator I) const {
546  if (!hasReservedCallFrame(MF)) {
547    // If the stack pointer can be changed after prologue, turn the
548    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
549    // adjcallstackdown instruction into 'add ESP, <amt>'
550    // TODO: consider using push / pop instead of sub + store / add
551    MachineInstr *Old = I;
552    uint64_t Amount = Old->getOperand(0).getImm();
553    if (Amount != 0) {
554      // We need to keep the stack aligned properly.  To do this, we round the
555      // amount of space needed for the outgoing arguments up to the next
556      // alignment boundary.
557      Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
558
559      MachineInstr *New = 0;
560      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
561        New = BuildMI(MF, Old->getDebugLoc(),
562                      TII.get(getSUBriOpcode(Is64Bit, Amount)),
563                      StackPtr)
564          .addReg(StackPtr)
565          .addImm(Amount);
566      } else {
567        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
568
569        // Factor out the amount the callee already popped.
570        uint64_t CalleeAmt = Old->getOperand(1).getImm();
571        Amount -= CalleeAmt;
572
573      if (Amount) {
574          unsigned Opc = getADDriOpcode(Is64Bit, Amount);
575          New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
576            .addReg(StackPtr)
577            .addImm(Amount);
578        }
579      }
580
581      if (New) {
582        // The EFLAGS implicit def is dead.
583        New->getOperand(3).setIsDead();
584
585        // Replace the pseudo instruction with a new instruction.
586        MBB.insert(I, New);
587      }
588    }
589  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
590    // If we are performing frame pointer elimination and if the callee pops
591    // something off the stack pointer, add it back.  We do this until we have
592    // more advanced stack pointer tracking ability.
593    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
594      unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
595      MachineInstr *Old = I;
596      MachineInstr *New =
597        BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
598                StackPtr)
599          .addReg(StackPtr)
600          .addImm(CalleeAmt);
601
602      // The EFLAGS implicit def is dead.
603      New->getOperand(3).setIsDead();
604      MBB.insert(I, New);
605    }
606  }
607
608  MBB.erase(I);
609}
610
611unsigned
612X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
613                                     int SPAdj, FrameIndexValue *Value,
614                                     RegScavenger *RS) const{
615  assert(SPAdj == 0 && "Unexpected");
616
617  unsigned i = 0;
618  MachineInstr &MI = *II;
619  MachineFunction &MF = *MI.getParent()->getParent();
620
621  while (!MI.getOperand(i).isFI()) {
622    ++i;
623    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
624  }
625
626  int FrameIndex = MI.getOperand(i).getIndex();
627  unsigned BasePtr;
628
629  unsigned Opc = MI.getOpcode();
630  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
631  if (needsStackRealignment(MF))
632    BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
633  else if (AfterFPPop)
634    BasePtr = StackPtr;
635  else
636    BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
637
638  // This must be part of a four operand memory reference.  Replace the
639  // FrameIndex with base register with EBP.  Add an offset to the offset.
640  MI.getOperand(i).ChangeToRegister(BasePtr, false);
641
642  // Now add the frame object offset to the offset from EBP.
643  int FIOffset;
644  if (AfterFPPop) {
645    // Tail call jmp happens after FP is popped.
646    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
647    const MachineFrameInfo *MFI = MF.getFrameInfo();
648    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
649  } else
650    FIOffset = getFrameIndexOffset(MF, FrameIndex);
651
652  if (MI.getOperand(i+3).isImm()) {
653    // Offset is a 32-bit integer.
654    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
655    MI.getOperand(i + 3).ChangeToImmediate(Offset);
656  } else {
657    // Offset is symbolic. This is extremely rare.
658    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
659    MI.getOperand(i+3).setOffset(Offset);
660  }
661  return 0;
662}
663
664void
665X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
666                                                      RegScavenger *RS) const {
667  MachineFrameInfo *MFI = MF.getFrameInfo();
668
669  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
670  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
671
672  if (TailCallReturnAddrDelta < 0) {
673    // create RETURNADDR area
674    //   arg
675    //   arg
676    //   RETADDR
677    //   { ...
678    //     RETADDR area
679    //     ...
680    //   }
681    //   [EBP]
682    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
683                           (-1U*SlotSize)+TailCallReturnAddrDelta,
684                           true, false);
685  }
686
687  if (hasFP(MF)) {
688    assert((TailCallReturnAddrDelta <= 0) &&
689           "The Delta should always be zero or negative");
690    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
691
692    // Create a frame entry for the EBP register that must be saved.
693    int FrameIdx = MFI->CreateFixedObject(SlotSize,
694                                          -(int)SlotSize +
695                                          TFI.getOffsetOfLocalArea() +
696                                          TailCallReturnAddrDelta,
697                                          true, false);
698    assert(FrameIdx == MFI->getObjectIndexBegin() &&
699           "Slot for EBP register must be last in order to be found!");
700    FrameIdx = 0;
701  }
702}
703
704/// emitSPUpdate - Emit a series of instructions to increment / decrement the
705/// stack pointer by a constant value.
706static
707void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
708                  unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
709                  const TargetInstrInfo &TII) {
710  bool isSub = NumBytes < 0;
711  uint64_t Offset = isSub ? -NumBytes : NumBytes;
712  unsigned Opc = isSub ?
713    getSUBriOpcode(Is64Bit, Offset) :
714    getADDriOpcode(Is64Bit, Offset);
715  uint64_t Chunk = (1LL << 31) - 1;
716  DebugLoc DL = MBB.findDebugLoc(MBBI);
717
718  while (Offset) {
719    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
720    MachineInstr *MI =
721      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
722        .addReg(StackPtr)
723        .addImm(ThisVal);
724    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
725    Offset -= ThisVal;
726  }
727}
728
729/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
730static
731void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
732                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
733  if (MBBI == MBB.begin()) return;
734
735  MachineBasicBlock::iterator PI = prior(MBBI);
736  unsigned Opc = PI->getOpcode();
737  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
738       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
739      PI->getOperand(0).getReg() == StackPtr) {
740    if (NumBytes)
741      *NumBytes += PI->getOperand(2).getImm();
742    MBB.erase(PI);
743  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
744              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
745             PI->getOperand(0).getReg() == StackPtr) {
746    if (NumBytes)
747      *NumBytes -= PI->getOperand(2).getImm();
748    MBB.erase(PI);
749  }
750}
751
752/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
753static
754void mergeSPUpdatesDown(MachineBasicBlock &MBB,
755                        MachineBasicBlock::iterator &MBBI,
756                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
757  // FIXME: THIS ISN'T RUN!!!
758  return;
759
760  if (MBBI == MBB.end()) return;
761
762  MachineBasicBlock::iterator NI = llvm::next(MBBI);
763  if (NI == MBB.end()) return;
764
765  unsigned Opc = NI->getOpcode();
766  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
767       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
768      NI->getOperand(0).getReg() == StackPtr) {
769    if (NumBytes)
770      *NumBytes -= NI->getOperand(2).getImm();
771    MBB.erase(NI);
772    MBBI = NI;
773  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
774              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
775             NI->getOperand(0).getReg() == StackPtr) {
776    if (NumBytes)
777      *NumBytes += NI->getOperand(2).getImm();
778    MBB.erase(NI);
779    MBBI = NI;
780  }
781}
782
783/// mergeSPUpdates - Checks the instruction before/after the passed
784/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
785/// stack adjustment is returned as a positive value for ADD and a negative for
786/// SUB.
787static int mergeSPUpdates(MachineBasicBlock &MBB,
788                           MachineBasicBlock::iterator &MBBI,
789                           unsigned StackPtr,
790                           bool doMergeWithPrevious) {
791  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
792      (!doMergeWithPrevious && MBBI == MBB.end()))
793    return 0;
794
795  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
796  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
797  unsigned Opc = PI->getOpcode();
798  int Offset = 0;
799
800  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
801       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
802      PI->getOperand(0).getReg() == StackPtr){
803    Offset += PI->getOperand(2).getImm();
804    MBB.erase(PI);
805    if (!doMergeWithPrevious) MBBI = NI;
806  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
807              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
808             PI->getOperand(0).getReg() == StackPtr) {
809    Offset -= PI->getOperand(2).getImm();
810    MBB.erase(PI);
811    if (!doMergeWithPrevious) MBBI = NI;
812  }
813
814  return Offset;
815}
816
817void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
818                                                MCSymbol *Label,
819                                                unsigned FramePtr) const {
820  MachineFrameInfo *MFI = MF.getFrameInfo();
821  MachineModuleInfo &MMI = MF.getMMI();
822
823  // Add callee saved registers to move list.
824  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
825  if (CSI.empty()) return;
826
827  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
828  const TargetData *TD = MF.getTarget().getTargetData();
829  bool HasFP = hasFP(MF);
830
831  // Calculate amount of bytes used for return address storing.
832  int stackGrowth =
833    (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
834     TargetFrameInfo::StackGrowsUp ?
835     TD->getPointerSize() : -TD->getPointerSize());
836
837  // FIXME: This is dirty hack. The code itself is pretty mess right now.
838  // It should be rewritten from scratch and generalized sometimes.
839
840  // Determine maximum offset (minumum due to stack growth).
841  int64_t MaxOffset = 0;
842  for (std::vector<CalleeSavedInfo>::const_iterator
843         I = CSI.begin(), E = CSI.end(); I != E; ++I)
844    MaxOffset = std::min(MaxOffset,
845                         MFI->getObjectOffset(I->getFrameIdx()));
846
847  // Calculate offsets.
848  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
849  for (std::vector<CalleeSavedInfo>::const_iterator
850         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
851    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
852    unsigned Reg = I->getReg();
853    Offset = MaxOffset - Offset + saveAreaOffset;
854
855    // Don't output a new machine move if we're re-saving the frame
856    // pointer. This happens when the PrologEpilogInserter has inserted an extra
857    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
858    // generates one when frame pointers are used. If we generate a "machine
859    // move" for this extra "PUSH", the linker will lose track of the fact that
860    // the frame pointer should have the value of the first "PUSH" when it's
861    // trying to unwind.
862    //
863    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
864    //        another bug. I.e., one where we generate a prolog like this:
865    //
866    //          pushl  %ebp
867    //          movl   %esp, %ebp
868    //          pushl  %ebp
869    //          pushl  %esi
870    //           ...
871    //
872    //        The immediate re-push of EBP is unnecessary. At the least, it's an
873    //        optimization bug. EBP can be used as a scratch register in certain
874    //        cases, but probably not when we have a frame pointer.
875    if (HasFP && FramePtr == Reg)
876      continue;
877
878    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
879    MachineLocation CSSrc(Reg);
880    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
881  }
882}
883
884/// emitPrologue - Push callee-saved registers onto the stack, which
885/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
886/// space for local variables. Also emit labels used by the exception handler to
887/// generate the exception handling frames.
888void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
889  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
890  MachineBasicBlock::iterator MBBI = MBB.begin();
891  MachineFrameInfo *MFI = MF.getFrameInfo();
892  const Function *Fn = MF.getFunction();
893  const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
894  MachineModuleInfo &MMI = MF.getMMI();
895  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
896  bool needsFrameMoves = MMI.hasDebugInfo() ||
897                          !Fn->doesNotThrow() || UnwindTablesMandatory;
898  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
899  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
900  bool HasFP = hasFP(MF);
901  DebugLoc DL;
902
903  // Add RETADDR move area to callee saved frame size.
904  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
905  if (TailCallReturnAddrDelta < 0)
906    X86FI->setCalleeSavedFrameSize(
907      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
908
909  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
910  // function, and use up to 128 bytes of stack space, don't have a frame
911  // pointer, calls, or dynamic alloca then we do not need to adjust the
912  // stack pointer (we fit in the Red Zone).
913  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
914      !needsStackRealignment(MF) &&
915      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
916      !MFI->adjustsStack() &&                      // No calls.
917      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
918    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
919    if (HasFP) MinSize += SlotSize;
920    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
921    MFI->setStackSize(StackSize);
922  } else if (Subtarget->isTargetWin64()) {
923    // We need to always allocate 32 bytes as register spill area.
924    // FIXME: We might reuse these 32 bytes for leaf functions.
925    StackSize += 32;
926    MFI->setStackSize(StackSize);
927  }
928
929  // Insert stack pointer adjustment for later moving of return addr.  Only
930  // applies to tail call optimized functions where the callee argument stack
931  // size is bigger than the callers.
932  if (TailCallReturnAddrDelta < 0) {
933    MachineInstr *MI =
934      BuildMI(MBB, MBBI, DL,
935              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
936              StackPtr)
937        .addReg(StackPtr)
938        .addImm(-TailCallReturnAddrDelta);
939    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
940  }
941
942  // Mapping for machine moves:
943  //
944  //   DST: VirtualFP AND
945  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
946  //        ELSE                        => DW_CFA_def_cfa
947  //
948  //   SRC: VirtualFP AND
949  //        DST: Register               => DW_CFA_def_cfa_register
950  //
951  //   ELSE
952  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
953  //        REG < 64                    => DW_CFA_offset + Reg
954  //        ELSE                        => DW_CFA_offset_extended
955
956  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
957  const TargetData *TD = MF.getTarget().getTargetData();
958  uint64_t NumBytes = 0;
959  int stackGrowth = -TD->getPointerSize();
960
961  if (HasFP) {
962    // Calculate required stack adjustment.
963    uint64_t FrameSize = StackSize - SlotSize;
964    if (needsStackRealignment(MF))
965      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
966
967    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
968
969    // Get the offset of the stack slot for the EBP register, which is
970    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
971    // Update the frame offset adjustment.
972    MFI->setOffsetAdjustment(-NumBytes);
973
974    // Save EBP/RBP into the appropriate stack slot.
975    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
976      .addReg(FramePtr, RegState::Kill);
977
978    if (needsFrameMoves) {
979      // Mark the place where EBP/RBP was saved.
980      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
981      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
982
983      // Define the current CFA rule to use the provided offset.
984      if (StackSize) {
985        MachineLocation SPDst(MachineLocation::VirtualFP);
986        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
987        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
988      } else {
989        // FIXME: Verify & implement for FP
990        MachineLocation SPDst(StackPtr);
991        MachineLocation SPSrc(StackPtr, stackGrowth);
992        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
993      }
994
995      // Change the rule for the FramePtr to be an "offset" rule.
996      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
997      MachineLocation FPSrc(FramePtr);
998      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
999    }
1000
1001    // Update EBP with the new base value...
1002    BuildMI(MBB, MBBI, DL,
1003            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
1004        .addReg(StackPtr);
1005
1006    if (needsFrameMoves) {
1007      // Mark effective beginning of when frame pointer becomes valid.
1008      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
1009      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
1010
1011      // Define the current CFA to use the EBP/RBP register.
1012      MachineLocation FPDst(FramePtr);
1013      MachineLocation FPSrc(MachineLocation::VirtualFP);
1014      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
1015    }
1016
1017    // Mark the FramePtr as live-in in every block except the entry.
1018    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
1019         I != E; ++I)
1020      I->addLiveIn(FramePtr);
1021
1022    // Realign stack
1023    if (needsStackRealignment(MF)) {
1024      MachineInstr *MI =
1025        BuildMI(MBB, MBBI, DL,
1026                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
1027                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
1028
1029      // The EFLAGS implicit def is dead.
1030      MI->getOperand(3).setIsDead();
1031    }
1032  } else {
1033    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
1034  }
1035
1036  // Skip the callee-saved push instructions.
1037  bool PushedRegs = false;
1038  int StackOffset = 2 * stackGrowth;
1039
1040  while (MBBI != MBB.end() &&
1041         (MBBI->getOpcode() == X86::PUSH32r ||
1042          MBBI->getOpcode() == X86::PUSH64r)) {
1043    PushedRegs = true;
1044    ++MBBI;
1045
1046    if (!HasFP && needsFrameMoves) {
1047      // Mark callee-saved push instruction.
1048      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
1049      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
1050
1051      // Define the current CFA rule to use the provided offset.
1052      unsigned Ptr = StackSize ?
1053        MachineLocation::VirtualFP : StackPtr;
1054      MachineLocation SPDst(Ptr);
1055      MachineLocation SPSrc(Ptr, StackOffset);
1056      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1057      StackOffset += stackGrowth;
1058    }
1059  }
1060
1061  DL = MBB.findDebugLoc(MBBI);
1062
1063  // Adjust stack pointer: ESP -= numbytes.
1064  if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
1065    // Check, whether EAX is livein for this function.
1066    bool isEAXAlive = false;
1067    for (MachineRegisterInfo::livein_iterator
1068           II = MF.getRegInfo().livein_begin(),
1069           EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
1070      unsigned Reg = II->first;
1071      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
1072                    Reg == X86::AH || Reg == X86::AL);
1073    }
1074
1075    // Function prologue calls _alloca to probe the stack when allocating more
1076    // than 4k bytes in one go. Touching the stack at 4K increments is necessary
1077    // to ensure that the guard pages used by the OS virtual memory manager are
1078    // allocated in correct sequence.
1079    if (!isEAXAlive) {
1080      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1081        .addImm(NumBytes);
1082      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
1083        .addExternalSymbol("_alloca")
1084        .addReg(StackPtr, RegState::Define | RegState::Implicit);
1085    } else {
1086      // Save EAX
1087      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1088        .addReg(X86::EAX, RegState::Kill);
1089
1090      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
1091      // allocated bytes for EAX.
1092      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1093        .addImm(NumBytes - 4);
1094      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
1095        .addExternalSymbol("_alloca")
1096        .addReg(StackPtr, RegState::Define | RegState::Implicit);
1097
1098      // Restore EAX
1099      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
1100                                              X86::EAX),
1101                                      StackPtr, false, NumBytes - 4);
1102      MBB.insert(MBBI, MI);
1103    }
1104  } else if (NumBytes) {
1105    // If there is an SUB32ri of ESP immediately before this instruction, merge
1106    // the two. This can be the case when tail call elimination is enabled and
1107    // the callee has more arguments then the caller.
1108    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
1109
1110    // If there is an ADD32ri or SUB32ri of ESP immediately after this
1111    // instruction, merge the two instructions.
1112    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
1113
1114    if (NumBytes)
1115      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
1116  }
1117
1118  if ((NumBytes || PushedRegs) && needsFrameMoves) {
1119    // Mark end of stack pointer adjustment.
1120    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
1121    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
1122
1123    if (!HasFP && NumBytes) {
1124      // Define the current CFA rule to use the provided offset.
1125      if (StackSize) {
1126        MachineLocation SPDst(MachineLocation::VirtualFP);
1127        MachineLocation SPSrc(MachineLocation::VirtualFP,
1128                              -StackSize + stackGrowth);
1129        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1130      } else {
1131        // FIXME: Verify & implement for FP
1132        MachineLocation SPDst(StackPtr);
1133        MachineLocation SPSrc(StackPtr, stackGrowth);
1134        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1135      }
1136    }
1137
1138    // Emit DWARF info specifying the offsets of the callee-saved registers.
1139    if (PushedRegs)
1140      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
1141  }
1142}
1143
1144void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
1145                                   MachineBasicBlock &MBB) const {
1146  const MachineFrameInfo *MFI = MF.getFrameInfo();
1147  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1148  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1149  unsigned RetOpcode = MBBI->getOpcode();
1150  DebugLoc DL = MBBI->getDebugLoc();
1151
1152  switch (RetOpcode) {
1153  default:
1154    llvm_unreachable("Can only insert epilog into returning blocks");
1155  case X86::RET:
1156  case X86::RETI:
1157  case X86::TCRETURNdi:
1158  case X86::TCRETURNri:
1159  case X86::TCRETURNmi:
1160  case X86::TCRETURNdi64:
1161  case X86::TCRETURNri64:
1162  case X86::TCRETURNmi64:
1163  case X86::EH_RETURN:
1164  case X86::EH_RETURN64:
1165    break;  // These are ok
1166  }
1167
1168  // Get the number of bytes to allocate from the FrameInfo.
1169  uint64_t StackSize = MFI->getStackSize();
1170  uint64_t MaxAlign  = MFI->getMaxAlignment();
1171  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1172  uint64_t NumBytes = 0;
1173
1174  if (hasFP(MF)) {
1175    // Calculate required stack adjustment.
1176    uint64_t FrameSize = StackSize - SlotSize;
1177    if (needsStackRealignment(MF))
1178      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
1179
1180    NumBytes = FrameSize - CSSize;
1181
1182    // Pop EBP.
1183    BuildMI(MBB, MBBI, DL,
1184            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
1185  } else {
1186    NumBytes = StackSize - CSSize;
1187  }
1188
1189  // Skip the callee-saved pop instructions.
1190  MachineBasicBlock::iterator LastCSPop = MBBI;
1191  while (MBBI != MBB.begin()) {
1192    MachineBasicBlock::iterator PI = prior(MBBI);
1193    unsigned Opc = PI->getOpcode();
1194
1195    if (Opc != X86::POP32r && Opc != X86::POP64r &&
1196        !PI->getDesc().isTerminator())
1197      break;
1198
1199    --MBBI;
1200  }
1201
1202  DL = MBBI->getDebugLoc();
1203
1204  // If there is an ADD32ri or SUB32ri of ESP immediately before this
1205  // instruction, merge the two instructions.
1206  if (NumBytes || MFI->hasVarSizedObjects())
1207    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
1208
1209  // If dynamic alloca is used, then reset esp to point to the last callee-saved
1210  // slot before popping them off! Same applies for the case, when stack was
1211  // realigned.
1212  if (needsStackRealignment(MF)) {
1213    // We cannot use LEA here, because stack pointer was realigned. We need to
1214    // deallocate local frame back.
1215    if (CSSize) {
1216      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
1217      MBBI = prior(LastCSPop);
1218    }
1219
1220    BuildMI(MBB, MBBI, DL,
1221            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
1222            StackPtr).addReg(FramePtr);
1223  } else if (MFI->hasVarSizedObjects()) {
1224    if (CSSize) {
1225      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
1226      MachineInstr *MI =
1227        addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
1228                        FramePtr, false, -CSSize);
1229      MBB.insert(MBBI, MI);
1230    } else {
1231      BuildMI(MBB, MBBI, DL,
1232              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
1233        .addReg(FramePtr);
1234    }
1235  } else if (NumBytes) {
1236    // Adjust stack pointer back: ESP += numbytes.
1237    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
1238  }
1239
1240  // We're returning from function via eh_return.
1241  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
1242    MBBI = prior(MBB.end());
1243    MachineOperand &DestAddr  = MBBI->getOperand(0);
1244    assert(DestAddr.isReg() && "Offset should be in register!");
1245    BuildMI(MBB, MBBI, DL,
1246            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
1247            StackPtr).addReg(DestAddr.getReg());
1248  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
1249             RetOpcode == X86::TCRETURNmi ||
1250             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
1251             RetOpcode == X86::TCRETURNmi64) {
1252    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
1253    // Tail call return: adjust the stack pointer and jump to callee.
1254    MBBI = prior(MBB.end());
1255    MachineOperand &JumpTarget = MBBI->getOperand(0);
1256    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
1257    assert(StackAdjust.isImm() && "Expecting immediate value.");
1258
1259    // Adjust stack pointer.
1260    int StackAdj = StackAdjust.getImm();
1261    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
1262    int Offset = 0;
1263    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
1264
1265    // Incoporate the retaddr area.
1266    Offset = StackAdj-MaxTCDelta;
1267    assert(Offset >= 0 && "Offset should never be negative");
1268
1269    if (Offset) {
1270      // Check for possible merge with preceeding ADD instruction.
1271      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1272      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
1273    }
1274
1275    // Jump to label or value in register.
1276    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
1277      BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
1278                                     ? X86::TAILJMPd : X86::TAILJMPd64)).
1279        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
1280                         JumpTarget.getTargetFlags());
1281    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
1282      MachineInstrBuilder MIB =
1283        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
1284                                       ? X86::TAILJMPm : X86::TAILJMPm64));
1285      for (unsigned i = 0; i != 5; ++i)
1286        MIB.addOperand(MBBI->getOperand(i));
1287    } else if (RetOpcode == X86::TCRETURNri64) {
1288      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
1289    } else {
1290      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
1291    }
1292
1293    MachineInstr *NewMI = prior(MBBI);
1294    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
1295      NewMI->addOperand(MBBI->getOperand(i));
1296
1297    // Delete the pseudo instruction TCRETURN.
1298    MBB.erase(MBBI);
1299  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
1300             (X86FI->getTCReturnAddrDelta() < 0)) {
1301    // Add the return addr area delta back since we are not tail calling.
1302    int delta = -1*X86FI->getTCReturnAddrDelta();
1303    MBBI = prior(MBB.end());
1304
1305    // Check for possible merge with preceeding ADD instruction.
1306    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1307    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
1308  }
1309}
1310
1311unsigned X86RegisterInfo::getRARegister() const {
1312  return Is64Bit ? X86::RIP     // Should have dwarf #16.
1313                 : X86::EIP;    // Should have dwarf #8.
1314}
1315
1316unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1317  return hasFP(MF) ? FramePtr : StackPtr;
1318}
1319
1320void
1321X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
1322  // Calculate amount of bytes used for return address storing
1323  int stackGrowth = (Is64Bit ? -8 : -4);
1324
1325  // Initial state of the frame pointer is esp+stackGrowth.
1326  MachineLocation Dst(MachineLocation::VirtualFP);
1327  MachineLocation Src(StackPtr, stackGrowth);
1328  Moves.push_back(MachineMove(0, Dst, Src));
1329
1330  // Add return address to move list
1331  MachineLocation CSDst(StackPtr, stackGrowth);
1332  MachineLocation CSSrc(getRARegister());
1333  Moves.push_back(MachineMove(0, CSDst, CSSrc));
1334}
1335
1336unsigned X86RegisterInfo::getEHExceptionRegister() const {
1337  llvm_unreachable("What is the exception register");
1338  return 0;
1339}
1340
1341unsigned X86RegisterInfo::getEHHandlerRegister() const {
1342  llvm_unreachable("What is the exception handler register");
1343  return 0;
1344}
1345
1346namespace llvm {
1347unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
1348  switch (VT.getSimpleVT().SimpleTy) {
1349  default: return Reg;
1350  case MVT::i8:
1351    if (High) {
1352      switch (Reg) {
1353      default: return 0;
1354      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1355        return X86::AH;
1356      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1357        return X86::DH;
1358      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1359        return X86::CH;
1360      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1361        return X86::BH;
1362      }
1363    } else {
1364      switch (Reg) {
1365      default: return 0;
1366      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1367        return X86::AL;
1368      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1369        return X86::DL;
1370      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1371        return X86::CL;
1372      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1373        return X86::BL;
1374      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1375        return X86::SIL;
1376      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1377        return X86::DIL;
1378      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1379        return X86::BPL;
1380      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1381        return X86::SPL;
1382      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1383        return X86::R8B;
1384      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1385        return X86::R9B;
1386      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1387        return X86::R10B;
1388      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1389        return X86::R11B;
1390      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1391        return X86::R12B;
1392      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1393        return X86::R13B;
1394      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1395        return X86::R14B;
1396      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1397        return X86::R15B;
1398      }
1399    }
1400  case MVT::i16:
1401    switch (Reg) {
1402    default: return Reg;
1403    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1404      return X86::AX;
1405    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1406      return X86::DX;
1407    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1408      return X86::CX;
1409    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1410      return X86::BX;
1411    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1412      return X86::SI;
1413    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1414      return X86::DI;
1415    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1416      return X86::BP;
1417    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1418      return X86::SP;
1419    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1420      return X86::R8W;
1421    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1422      return X86::R9W;
1423    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1424      return X86::R10W;
1425    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1426      return X86::R11W;
1427    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1428      return X86::R12W;
1429    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1430      return X86::R13W;
1431    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1432      return X86::R14W;
1433    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1434      return X86::R15W;
1435    }
1436  case MVT::i32:
1437    switch (Reg) {
1438    default: return Reg;
1439    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1440      return X86::EAX;
1441    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1442      return X86::EDX;
1443    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1444      return X86::ECX;
1445    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1446      return X86::EBX;
1447    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1448      return X86::ESI;
1449    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1450      return X86::EDI;
1451    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1452      return X86::EBP;
1453    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1454      return X86::ESP;
1455    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1456      return X86::R8D;
1457    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1458      return X86::R9D;
1459    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1460      return X86::R10D;
1461    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1462      return X86::R11D;
1463    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1464      return X86::R12D;
1465    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1466      return X86::R13D;
1467    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1468      return X86::R14D;
1469    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1470      return X86::R15D;
1471    }
1472  case MVT::i64:
1473    switch (Reg) {
1474    default: return Reg;
1475    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1476      return X86::RAX;
1477    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1478      return X86::RDX;
1479    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1480      return X86::RCX;
1481    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1482      return X86::RBX;
1483    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1484      return X86::RSI;
1485    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1486      return X86::RDI;
1487    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1488      return X86::RBP;
1489    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1490      return X86::RSP;
1491    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1492      return X86::R8;
1493    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1494      return X86::R9;
1495    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1496      return X86::R10;
1497    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1498      return X86::R11;
1499    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1500      return X86::R12;
1501    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1502      return X86::R13;
1503    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1504      return X86::R14;
1505    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1506      return X86::R15;
1507    }
1508  }
1509
1510  return Reg;
1511}
1512}
1513
1514#include "X86GenRegisterInfo.inc"
1515
1516namespace {
1517  struct MSAH : public MachineFunctionPass {
1518    static char ID;
1519    MSAH() : MachineFunctionPass(&ID) {}
1520
1521    virtual bool runOnMachineFunction(MachineFunction &MF) {
1522      const X86TargetMachine *TM =
1523        static_cast<const X86TargetMachine *>(&MF.getTarget());
1524      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
1525      MachineRegisterInfo &RI = MF.getRegInfo();
1526      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1527      unsigned StackAlignment = X86RI->getStackAlignment();
1528
1529      // Be over-conservative: scan over all vreg defs and find whether vector
1530      // registers are used. If yes, there is a possibility that vector register
1531      // will be spilled and thus require dynamic stack realignment.
1532      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
1533           RegNum < RI.getLastVirtReg(); ++RegNum)
1534        if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
1535          FuncInfo->setReserveFP(true);
1536          return true;
1537        }
1538
1539      // Nothing to do
1540      return false;
1541    }
1542
1543    virtual const char *getPassName() const {
1544      return "X86 Maximal Stack Alignment Check";
1545    }
1546
1547    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
1548      AU.setPreservesCFG();
1549      MachineFunctionPass::getAnalysisUsage(AU);
1550    }
1551  };
1552
1553  char MSAH::ID = 0;
1554}
1555
1556FunctionPass*
1557llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
1558