X86RegisterInfo.cpp revision 7431beaba2a01c3fe299c861b2ec85cbf1dc81c4
1//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the X86 implementation of the TargetRegisterInfo class.
11// This file is responsible for the frame pointer elimination optimization
12// on X86.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86RegisterInfo.h"
18#include "X86InstrBuilder.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/Constants.h"
23#include "llvm/Function.h"
24#include "llvm/Type.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineLocation.h"
31#include "llvm/CodeGen/MachineModuleInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/Target/TargetFrameInfo.h"
35#include "llvm/Target/TargetInstrInfo.h"
36#include "llvm/Target/TargetMachine.h"
37#include "llvm/Target/TargetOptions.h"
38#include "llvm/ADT/BitVector.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/Support/ErrorHandling.h"
41using namespace llvm;
42
43X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
44                                 const TargetInstrInfo &tii)
45  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
46                         X86::ADJCALLSTACKDOWN64 :
47                         X86::ADJCALLSTACKDOWN32,
48                       tm.getSubtarget<X86Subtarget>().is64Bit() ?
49                         X86::ADJCALLSTACKUP64 :
50                         X86::ADJCALLSTACKUP32),
51    TM(tm), TII(tii) {
52  // Cache some information.
53  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
54  Is64Bit = Subtarget->is64Bit();
55  IsWin64 = Subtarget->isTargetWin64();
56  StackAlign = TM.getFrameInfo()->getStackAlignment();
57
58  if (Is64Bit) {
59    SlotSize = 8;
60    StackPtr = X86::RSP;
61    FramePtr = X86::RBP;
62  } else {
63    SlotSize = 4;
64    StackPtr = X86::ESP;
65    FramePtr = X86::EBP;
66  }
67}
68
69/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
70/// specific numbering, used in debug info and exception tables.
71int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
72  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
73  unsigned Flavour = DWARFFlavour::X86_64;
74
75  if (!Subtarget->is64Bit()) {
76    if (Subtarget->isTargetDarwin()) {
77      if (isEH)
78        Flavour = DWARFFlavour::X86_32_DarwinEH;
79      else
80        Flavour = DWARFFlavour::X86_32_Generic;
81    } else if (Subtarget->isTargetCygMing()) {
82      // Unsupported by now, just quick fallback
83      Flavour = DWARFFlavour::X86_32_Generic;
84    } else {
85      Flavour = DWARFFlavour::X86_32_Generic;
86    }
87  }
88
89  return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
90}
91
92/// getX86RegNum - This function maps LLVM register identifiers to their X86
93/// specific numbering, which is used in various places encoding instructions.
94unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
95  switch(RegNo) {
96  case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
97  case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
98  case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
99  case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
100  case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
101    return N86::ESP;
102  case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
103    return N86::EBP;
104  case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
105    return N86::ESI;
106  case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
107    return N86::EDI;
108
109  case X86::R8:  case X86::R8D:  case X86::R8W:  case X86::R8B:
110    return N86::EAX;
111  case X86::R9:  case X86::R9D:  case X86::R9W:  case X86::R9B:
112    return N86::ECX;
113  case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
114    return N86::EDX;
115  case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
116    return N86::EBX;
117  case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
118    return N86::ESP;
119  case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
120    return N86::EBP;
121  case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
122    return N86::ESI;
123  case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
124    return N86::EDI;
125
126  case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
127  case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
128    return RegNo-X86::ST0;
129
130  case X86::XMM0: case X86::XMM8:
131  case X86::YMM0: case X86::YMM8: case X86::MM0:
132    return 0;
133  case X86::XMM1: case X86::XMM9:
134  case X86::YMM1: case X86::YMM9: case X86::MM1:
135    return 1;
136  case X86::XMM2: case X86::XMM10:
137  case X86::YMM2: case X86::YMM10: case X86::MM2:
138    return 2;
139  case X86::XMM3: case X86::XMM11:
140  case X86::YMM3: case X86::YMM11: case X86::MM3:
141    return 3;
142  case X86::XMM4: case X86::XMM12:
143  case X86::YMM4: case X86::YMM12: case X86::MM4:
144    return 4;
145  case X86::XMM5: case X86::XMM13:
146  case X86::YMM5: case X86::YMM13: case X86::MM5:
147    return 5;
148  case X86::XMM6: case X86::XMM14:
149  case X86::YMM6: case X86::YMM14: case X86::MM6:
150    return 6;
151  case X86::XMM7: case X86::XMM15:
152  case X86::YMM7: case X86::YMM15: case X86::MM7:
153    return 7;
154
155  case X86::ES:
156    return 0;
157  case X86::CS:
158    return 1;
159  case X86::SS:
160    return 2;
161  case X86::DS:
162    return 3;
163  case X86::FS:
164    return 4;
165  case X86::GS:
166    return 5;
167
168  case X86::CR0:
169    return 0;
170  case X86::CR1:
171    return 1;
172  case X86::CR2:
173    return 2;
174  case X86::CR3:
175    return 3;
176  case X86::CR4:
177    return 4;
178
179  case X86::DR0:
180    return 0;
181  case X86::DR1:
182    return 1;
183  case X86::DR2:
184    return 2;
185  case X86::DR3:
186    return 3;
187  case X86::DR4:
188    return 4;
189  case X86::DR5:
190    return 5;
191  case X86::DR6:
192    return 6;
193  case X86::DR7:
194    return 7;
195
196  default:
197    assert(isVirtualRegister(RegNo) && "Unknown physical register!");
198    llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
199    return 0;
200  }
201}
202
203const TargetRegisterClass *
204X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
205                                          const TargetRegisterClass *B,
206                                          unsigned SubIdx) const {
207  switch (SubIdx) {
208  default: return 0;
209  case X86::sub_8bit:
210    if (B == &X86::GR8RegClass) {
211      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
212        return A;
213    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
214      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
215          A == &X86::GR64_NOREXRegClass ||
216          A == &X86::GR64_NOSPRegClass ||
217          A == &X86::GR64_NOREX_NOSPRegClass)
218        return &X86::GR64_ABCDRegClass;
219      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
220               A == &X86::GR32_NOREXRegClass ||
221               A == &X86::GR32_NOSPRegClass)
222        return &X86::GR32_ABCDRegClass;
223      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
224               A == &X86::GR16_NOREXRegClass)
225        return &X86::GR16_ABCDRegClass;
226    } else if (B == &X86::GR8_NOREXRegClass) {
227      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
228          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
229        return &X86::GR64_NOREXRegClass;
230      else if (A == &X86::GR64_ABCDRegClass)
231        return &X86::GR64_ABCDRegClass;
232      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
233               A == &X86::GR32_NOSPRegClass)
234        return &X86::GR32_NOREXRegClass;
235      else if (A == &X86::GR32_ABCDRegClass)
236        return &X86::GR32_ABCDRegClass;
237      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
238        return &X86::GR16_NOREXRegClass;
239      else if (A == &X86::GR16_ABCDRegClass)
240        return &X86::GR16_ABCDRegClass;
241    }
242    break;
243  case X86::sub_8bit_hi:
244    if (B == &X86::GR8_ABCD_HRegClass) {
245      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
246          A == &X86::GR64_NOREXRegClass ||
247          A == &X86::GR64_NOSPRegClass ||
248          A == &X86::GR64_NOREX_NOSPRegClass)
249        return &X86::GR64_ABCDRegClass;
250      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
251               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
252        return &X86::GR32_ABCDRegClass;
253      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
254               A == &X86::GR16_NOREXRegClass)
255        return &X86::GR16_ABCDRegClass;
256    }
257    break;
258  case X86::sub_16bit:
259    if (B == &X86::GR16RegClass) {
260      if (A->getSize() == 4 || A->getSize() == 8)
261        return A;
262    } else if (B == &X86::GR16_ABCDRegClass) {
263      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
264          A == &X86::GR64_NOREXRegClass ||
265          A == &X86::GR64_NOSPRegClass ||
266          A == &X86::GR64_NOREX_NOSPRegClass)
267        return &X86::GR64_ABCDRegClass;
268      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
269               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
270        return &X86::GR32_ABCDRegClass;
271    } else if (B == &X86::GR16_NOREXRegClass) {
272      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
273          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
274        return &X86::GR64_NOREXRegClass;
275      else if (A == &X86::GR64_ABCDRegClass)
276        return &X86::GR64_ABCDRegClass;
277      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
278               A == &X86::GR32_NOSPRegClass)
279        return &X86::GR32_NOREXRegClass;
280      else if (A == &X86::GR32_ABCDRegClass)
281        return &X86::GR64_ABCDRegClass;
282    }
283    break;
284  case X86::sub_32bit:
285    if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
286      if (A->getSize() == 8)
287        return A;
288    } else if (B == &X86::GR32_ABCDRegClass) {
289      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
290          A == &X86::GR64_NOREXRegClass ||
291          A == &X86::GR64_NOSPRegClass ||
292          A == &X86::GR64_NOREX_NOSPRegClass)
293        return &X86::GR64_ABCDRegClass;
294    } else if (B == &X86::GR32_NOREXRegClass) {
295      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
296          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
297        return &X86::GR64_NOREXRegClass;
298      else if (A == &X86::GR64_ABCDRegClass)
299        return &X86::GR64_ABCDRegClass;
300    }
301    break;
302  case X86::sub_ss:
303    if (B == &X86::FR32RegClass)
304      return A;
305    break;
306  case X86::sub_sd:
307    if (B == &X86::FR64RegClass)
308      return A;
309    break;
310  case X86::sub_xmm:
311    if (B == &X86::VR128RegClass)
312      return A;
313    break;
314  }
315  return 0;
316}
317
318const TargetRegisterClass *
319X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
320  switch (Kind) {
321  default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
322  case 0: // Normal GPRs.
323    if (TM.getSubtarget<X86Subtarget>().is64Bit())
324      return &X86::GR64RegClass;
325    return &X86::GR32RegClass;
326  case 1: // Normal GRPs except the stack pointer (for encoding reasons).
327    if (TM.getSubtarget<X86Subtarget>().is64Bit())
328      return &X86::GR64_NOSPRegClass;
329    return &X86::GR32_NOSPRegClass;
330  }
331}
332
333const TargetRegisterClass *
334X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
335  if (RC == &X86::CCRRegClass) {
336    if (Is64Bit)
337      return &X86::GR64RegClass;
338    else
339      return &X86::GR32RegClass;
340  }
341  return NULL;
342}
343
344const unsigned *
345X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
346  bool callsEHReturn = false;
347  bool ghcCall = false;
348
349  if (MF) {
350    callsEHReturn = MF->getMMI().callsEHReturn();
351    const Function *F = MF->getFunction();
352    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
353  }
354
355  static const unsigned GhcCalleeSavedRegs[] = {
356    0
357  };
358
359  static const unsigned CalleeSavedRegs32Bit[] = {
360    X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
361  };
362
363  static const unsigned CalleeSavedRegs32EHRet[] = {
364    X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
365  };
366
367  static const unsigned CalleeSavedRegs64Bit[] = {
368    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
369  };
370
371  static const unsigned CalleeSavedRegs64EHRet[] = {
372    X86::RAX, X86::RDX, X86::RBX, X86::R12,
373    X86::R13, X86::R14, X86::R15, X86::RBP, 0
374  };
375
376  static const unsigned CalleeSavedRegsWin64[] = {
377    X86::RBX,   X86::RBP,   X86::RDI,   X86::RSI,
378    X86::R12,   X86::R13,   X86::R14,   X86::R15,
379    X86::XMM6,  X86::XMM7,  X86::XMM8,  X86::XMM9,
380    X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
381    X86::XMM14, X86::XMM15, 0
382  };
383
384  if (ghcCall) {
385    return GhcCalleeSavedRegs;
386  } else if (Is64Bit) {
387    if (IsWin64)
388      return CalleeSavedRegsWin64;
389    else
390      return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
391  } else {
392    return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
393  }
394}
395
396BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
397  BitVector Reserved(getNumRegs());
398  // Set the stack-pointer register and its aliases as reserved.
399  Reserved.set(X86::RSP);
400  Reserved.set(X86::ESP);
401  Reserved.set(X86::SP);
402  Reserved.set(X86::SPL);
403
404  // Set the instruction pointer register and its aliases as reserved.
405  Reserved.set(X86::RIP);
406  Reserved.set(X86::EIP);
407  Reserved.set(X86::IP);
408
409  // Set the frame-pointer register and its aliases as reserved if needed.
410  if (hasFP(MF)) {
411    Reserved.set(X86::RBP);
412    Reserved.set(X86::EBP);
413    Reserved.set(X86::BP);
414    Reserved.set(X86::BPL);
415  }
416
417  // Mark the x87 stack registers as reserved, since they don't behave normally
418  // with respect to liveness. We don't fully model the effects of x87 stack
419  // pushes and pops after stackification.
420  Reserved.set(X86::ST0);
421  Reserved.set(X86::ST1);
422  Reserved.set(X86::ST2);
423  Reserved.set(X86::ST3);
424  Reserved.set(X86::ST4);
425  Reserved.set(X86::ST5);
426  Reserved.set(X86::ST6);
427  Reserved.set(X86::ST7);
428  return Reserved;
429}
430
431//===----------------------------------------------------------------------===//
432// Stack Frame Processing methods
433//===----------------------------------------------------------------------===//
434
435/// hasFP - Return true if the specified function should have a dedicated frame
436/// pointer register.  This is true if the function has variable sized allocas
437/// or if frame pointer elimination is disabled.
438bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
439  const MachineFrameInfo *MFI = MF.getFrameInfo();
440  const MachineModuleInfo &MMI = MF.getMMI();
441
442  return (DisableFramePointerElim(MF) ||
443          needsStackRealignment(MF) ||
444          MFI->hasVarSizedObjects() ||
445          MFI->isFrameAddressTaken() ||
446          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
447          MMI.callsUnwindInit());
448}
449
450bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
451  const MachineFrameInfo *MFI = MF.getFrameInfo();
452  return (RealignStack &&
453          !MFI->hasVarSizedObjects());
454}
455
456bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
457  const MachineFrameInfo *MFI = MF.getFrameInfo();
458  const Function *F = MF.getFunction();
459  bool requiresRealignment =
460    RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
461                     F->hasFnAttr(Attribute::StackAlignment));
462
463  // FIXME: Currently we don't support stack realignment for functions with
464  //        variable-sized allocas.
465  // FIXME: Temporary disable the error - it seems to be too conservative.
466  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
467    report_fatal_error(
468      "Stack realignment in presense of dynamic allocas is not supported");
469
470  return (requiresRealignment && !MFI->hasVarSizedObjects());
471}
472
473bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
474  return !MF.getFrameInfo()->hasVarSizedObjects();
475}
476
477bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
478                                           int &FrameIdx) const {
479  if (Reg == FramePtr && hasFP(MF)) {
480    FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
481    return true;
482  }
483  return false;
484}
485
486int
487X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
488  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
489  const MachineFrameInfo *MFI = MF.getFrameInfo();
490  int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
491  uint64_t StackSize = MFI->getStackSize();
492
493  if (needsStackRealignment(MF)) {
494    if (FI < 0) {
495      // Skip the saved EBP.
496      Offset += SlotSize;
497    } else {
498      unsigned Align = MFI->getObjectAlignment(FI);
499      assert((-(Offset + StackSize)) % Align == 0);
500      Align = 0;
501      return Offset + StackSize;
502    }
503    // FIXME: Support tail calls
504  } else {
505    if (!hasFP(MF))
506      return Offset + StackSize;
507
508    // Skip the saved EBP.
509    Offset += SlotSize;
510
511    // Skip the RETADDR move area
512    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
513    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
514    if (TailCallReturnAddrDelta < 0)
515      Offset -= TailCallReturnAddrDelta;
516  }
517
518  return Offset;
519}
520
521static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
522  if (is64Bit) {
523    if (isInt<8>(Imm))
524      return X86::SUB64ri8;
525    return X86::SUB64ri32;
526  } else {
527    if (isInt<8>(Imm))
528      return X86::SUB32ri8;
529    return X86::SUB32ri;
530  }
531}
532
533static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
534  if (is64Bit) {
535    if (isInt<8>(Imm))
536      return X86::ADD64ri8;
537    return X86::ADD64ri32;
538  } else {
539    if (isInt<8>(Imm))
540      return X86::ADD32ri8;
541    return X86::ADD32ri;
542  }
543}
544
545void X86RegisterInfo::
546eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
547                              MachineBasicBlock::iterator I) const {
548  if (!hasReservedCallFrame(MF)) {
549    // If the stack pointer can be changed after prologue, turn the
550    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
551    // adjcallstackdown instruction into 'add ESP, <amt>'
552    // TODO: consider using push / pop instead of sub + store / add
553    MachineInstr *Old = I;
554    uint64_t Amount = Old->getOperand(0).getImm();
555    if (Amount != 0) {
556      // We need to keep the stack aligned properly.  To do this, we round the
557      // amount of space needed for the outgoing arguments up to the next
558      // alignment boundary.
559      Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
560
561      MachineInstr *New = 0;
562      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
563        New = BuildMI(MF, Old->getDebugLoc(),
564                      TII.get(getSUBriOpcode(Is64Bit, Amount)),
565                      StackPtr)
566          .addReg(StackPtr)
567          .addImm(Amount);
568      } else {
569        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
570
571        // Factor out the amount the callee already popped.
572        uint64_t CalleeAmt = Old->getOperand(1).getImm();
573        Amount -= CalleeAmt;
574
575      if (Amount) {
576          unsigned Opc = getADDriOpcode(Is64Bit, Amount);
577          New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
578            .addReg(StackPtr)
579            .addImm(Amount);
580        }
581      }
582
583      if (New) {
584        // The EFLAGS implicit def is dead.
585        New->getOperand(3).setIsDead();
586
587        // Replace the pseudo instruction with a new instruction.
588        MBB.insert(I, New);
589      }
590    }
591  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
592    // If we are performing frame pointer elimination and if the callee pops
593    // something off the stack pointer, add it back.  We do this until we have
594    // more advanced stack pointer tracking ability.
595    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
596      unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
597      MachineInstr *Old = I;
598      MachineInstr *New =
599        BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
600                StackPtr)
601          .addReg(StackPtr)
602          .addImm(CalleeAmt);
603
604      // The EFLAGS implicit def is dead.
605      New->getOperand(3).setIsDead();
606      MBB.insert(I, New);
607    }
608  }
609
610  MBB.erase(I);
611}
612
613unsigned
614X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
615                                     int SPAdj, FrameIndexValue *Value,
616                                     RegScavenger *RS) const{
617  assert(SPAdj == 0 && "Unexpected");
618
619  unsigned i = 0;
620  MachineInstr &MI = *II;
621  MachineFunction &MF = *MI.getParent()->getParent();
622
623  while (!MI.getOperand(i).isFI()) {
624    ++i;
625    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
626  }
627
628  int FrameIndex = MI.getOperand(i).getIndex();
629  unsigned BasePtr;
630
631  unsigned Opc = MI.getOpcode();
632  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
633  if (needsStackRealignment(MF))
634    BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
635  else if (AfterFPPop)
636    BasePtr = StackPtr;
637  else
638    BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
639
640  // This must be part of a four operand memory reference.  Replace the
641  // FrameIndex with base register with EBP.  Add an offset to the offset.
642  MI.getOperand(i).ChangeToRegister(BasePtr, false);
643
644  // Now add the frame object offset to the offset from EBP.
645  int FIOffset;
646  if (AfterFPPop) {
647    // Tail call jmp happens after FP is popped.
648    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
649    const MachineFrameInfo *MFI = MF.getFrameInfo();
650    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
651  } else
652    FIOffset = getFrameIndexOffset(MF, FrameIndex);
653
654  if (MI.getOperand(i+3).isImm()) {
655    // Offset is a 32-bit integer.
656    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
657    MI.getOperand(i + 3).ChangeToImmediate(Offset);
658  } else {
659    // Offset is symbolic. This is extremely rare.
660    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
661    MI.getOperand(i+3).setOffset(Offset);
662  }
663  return 0;
664}
665
666void
667X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
668                                                      RegScavenger *RS) const {
669  MachineFrameInfo *MFI = MF.getFrameInfo();
670
671  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
672  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
673
674  if (TailCallReturnAddrDelta < 0) {
675    // create RETURNADDR area
676    //   arg
677    //   arg
678    //   RETADDR
679    //   { ...
680    //     RETADDR area
681    //     ...
682    //   }
683    //   [EBP]
684    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
685                           (-1U*SlotSize)+TailCallReturnAddrDelta, true);
686  }
687
688  if (hasFP(MF)) {
689    assert((TailCallReturnAddrDelta <= 0) &&
690           "The Delta should always be zero or negative");
691    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
692
693    // Create a frame entry for the EBP register that must be saved.
694    int FrameIdx = MFI->CreateFixedObject(SlotSize,
695                                          -(int)SlotSize +
696                                          TFI.getOffsetOfLocalArea() +
697                                          TailCallReturnAddrDelta,
698                                          true);
699    assert(FrameIdx == MFI->getObjectIndexBegin() &&
700           "Slot for EBP register must be last in order to be found!");
701    FrameIdx = 0;
702  }
703}
704
705/// emitSPUpdate - Emit a series of instructions to increment / decrement the
706/// stack pointer by a constant value.
707static
708void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
709                  unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
710                  const TargetInstrInfo &TII) {
711  bool isSub = NumBytes < 0;
712  uint64_t Offset = isSub ? -NumBytes : NumBytes;
713  unsigned Opc = isSub ?
714    getSUBriOpcode(Is64Bit, Offset) :
715    getADDriOpcode(Is64Bit, Offset);
716  uint64_t Chunk = (1LL << 31) - 1;
717  DebugLoc DL = MBB.findDebugLoc(MBBI);
718
719  while (Offset) {
720    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
721    MachineInstr *MI =
722      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
723        .addReg(StackPtr)
724        .addImm(ThisVal);
725    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
726    Offset -= ThisVal;
727  }
728}
729
730/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
731static
732void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
733                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
734  if (MBBI == MBB.begin()) return;
735
736  MachineBasicBlock::iterator PI = prior(MBBI);
737  unsigned Opc = PI->getOpcode();
738  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
739       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
740      PI->getOperand(0).getReg() == StackPtr) {
741    if (NumBytes)
742      *NumBytes += PI->getOperand(2).getImm();
743    MBB.erase(PI);
744  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
745              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
746             PI->getOperand(0).getReg() == StackPtr) {
747    if (NumBytes)
748      *NumBytes -= PI->getOperand(2).getImm();
749    MBB.erase(PI);
750  }
751}
752
753/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
754static
755void mergeSPUpdatesDown(MachineBasicBlock &MBB,
756                        MachineBasicBlock::iterator &MBBI,
757                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
758  // FIXME: THIS ISN'T RUN!!!
759  return;
760
761  if (MBBI == MBB.end()) return;
762
763  MachineBasicBlock::iterator NI = llvm::next(MBBI);
764  if (NI == MBB.end()) return;
765
766  unsigned Opc = NI->getOpcode();
767  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
768       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
769      NI->getOperand(0).getReg() == StackPtr) {
770    if (NumBytes)
771      *NumBytes -= NI->getOperand(2).getImm();
772    MBB.erase(NI);
773    MBBI = NI;
774  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
775              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
776             NI->getOperand(0).getReg() == StackPtr) {
777    if (NumBytes)
778      *NumBytes += NI->getOperand(2).getImm();
779    MBB.erase(NI);
780    MBBI = NI;
781  }
782}
783
784/// mergeSPUpdates - Checks the instruction before/after the passed
785/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
786/// stack adjustment is returned as a positive value for ADD and a negative for
787/// SUB.
788static int mergeSPUpdates(MachineBasicBlock &MBB,
789                           MachineBasicBlock::iterator &MBBI,
790                           unsigned StackPtr,
791                           bool doMergeWithPrevious) {
792  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
793      (!doMergeWithPrevious && MBBI == MBB.end()))
794    return 0;
795
796  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
797  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
798  unsigned Opc = PI->getOpcode();
799  int Offset = 0;
800
801  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
802       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
803      PI->getOperand(0).getReg() == StackPtr){
804    Offset += PI->getOperand(2).getImm();
805    MBB.erase(PI);
806    if (!doMergeWithPrevious) MBBI = NI;
807  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
808              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
809             PI->getOperand(0).getReg() == StackPtr) {
810    Offset -= PI->getOperand(2).getImm();
811    MBB.erase(PI);
812    if (!doMergeWithPrevious) MBBI = NI;
813  }
814
815  return Offset;
816}
817
818void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
819                                                MCSymbol *Label,
820                                                unsigned FramePtr) const {
821  MachineFrameInfo *MFI = MF.getFrameInfo();
822  MachineModuleInfo &MMI = MF.getMMI();
823
824  // Add callee saved registers to move list.
825  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
826  if (CSI.empty()) return;
827
828  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
829  const TargetData *TD = MF.getTarget().getTargetData();
830  bool HasFP = hasFP(MF);
831
832  // Calculate amount of bytes used for return address storing.
833  int stackGrowth =
834    (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
835     TargetFrameInfo::StackGrowsUp ?
836     TD->getPointerSize() : -TD->getPointerSize());
837
838  // FIXME: This is dirty hack. The code itself is pretty mess right now.
839  // It should be rewritten from scratch and generalized sometimes.
840
841  // Determine maximum offset (minumum due to stack growth).
842  int64_t MaxOffset = 0;
843  for (std::vector<CalleeSavedInfo>::const_iterator
844         I = CSI.begin(), E = CSI.end(); I != E; ++I)
845    MaxOffset = std::min(MaxOffset,
846                         MFI->getObjectOffset(I->getFrameIdx()));
847
848  // Calculate offsets.
849  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
850  for (std::vector<CalleeSavedInfo>::const_iterator
851         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
852    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
853    unsigned Reg = I->getReg();
854    Offset = MaxOffset - Offset + saveAreaOffset;
855
856    // Don't output a new machine move if we're re-saving the frame
857    // pointer. This happens when the PrologEpilogInserter has inserted an extra
858    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
859    // generates one when frame pointers are used. If we generate a "machine
860    // move" for this extra "PUSH", the linker will lose track of the fact that
861    // the frame pointer should have the value of the first "PUSH" when it's
862    // trying to unwind.
863    //
864    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
865    //        another bug. I.e., one where we generate a prolog like this:
866    //
867    //          pushl  %ebp
868    //          movl   %esp, %ebp
869    //          pushl  %ebp
870    //          pushl  %esi
871    //           ...
872    //
873    //        The immediate re-push of EBP is unnecessary. At the least, it's an
874    //        optimization bug. EBP can be used as a scratch register in certain
875    //        cases, but probably not when we have a frame pointer.
876    if (HasFP && FramePtr == Reg)
877      continue;
878
879    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
880    MachineLocation CSSrc(Reg);
881    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
882  }
883}
884
885/// emitPrologue - Push callee-saved registers onto the stack, which
886/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
887/// space for local variables. Also emit labels used by the exception handler to
888/// generate the exception handling frames.
889void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
890  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
891  MachineBasicBlock::iterator MBBI = MBB.begin();
892  MachineFrameInfo *MFI = MF.getFrameInfo();
893  const Function *Fn = MF.getFunction();
894  const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
895  MachineModuleInfo &MMI = MF.getMMI();
896  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
897  bool needsFrameMoves = MMI.hasDebugInfo() ||
898                          !Fn->doesNotThrow() || UnwindTablesMandatory;
899  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
900  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
901  bool HasFP = hasFP(MF);
902  DebugLoc DL;
903
904  // Add RETADDR move area to callee saved frame size.
905  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
906  if (TailCallReturnAddrDelta < 0)
907    X86FI->setCalleeSavedFrameSize(
908      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
909
910  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
911  // function, and use up to 128 bytes of stack space, don't have a frame
912  // pointer, calls, or dynamic alloca then we do not need to adjust the
913  // stack pointer (we fit in the Red Zone).
914  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
915      !needsStackRealignment(MF) &&
916      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
917      !MFI->adjustsStack() &&                      // No calls.
918      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
919    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
920    if (HasFP) MinSize += SlotSize;
921    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
922    MFI->setStackSize(StackSize);
923  } else if (Subtarget->isTargetWin64()) {
924    // We need to always allocate 32 bytes as register spill area.
925    // FIXME: We might reuse these 32 bytes for leaf functions.
926    StackSize += 32;
927    MFI->setStackSize(StackSize);
928  }
929
930  // Insert stack pointer adjustment for later moving of return addr.  Only
931  // applies to tail call optimized functions where the callee argument stack
932  // size is bigger than the callers.
933  if (TailCallReturnAddrDelta < 0) {
934    MachineInstr *MI =
935      BuildMI(MBB, MBBI, DL,
936              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
937              StackPtr)
938        .addReg(StackPtr)
939        .addImm(-TailCallReturnAddrDelta);
940    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
941  }
942
943  // Mapping for machine moves:
944  //
945  //   DST: VirtualFP AND
946  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
947  //        ELSE                        => DW_CFA_def_cfa
948  //
949  //   SRC: VirtualFP AND
950  //        DST: Register               => DW_CFA_def_cfa_register
951  //
952  //   ELSE
953  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
954  //        REG < 64                    => DW_CFA_offset + Reg
955  //        ELSE                        => DW_CFA_offset_extended
956
957  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
958  const TargetData *TD = MF.getTarget().getTargetData();
959  uint64_t NumBytes = 0;
960  int stackGrowth = -TD->getPointerSize();
961
962  if (HasFP) {
963    // Calculate required stack adjustment.
964    uint64_t FrameSize = StackSize - SlotSize;
965    if (needsStackRealignment(MF))
966      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
967
968    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
969
970    // Get the offset of the stack slot for the EBP register, which is
971    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
972    // Update the frame offset adjustment.
973    MFI->setOffsetAdjustment(-NumBytes);
974
975    // Save EBP/RBP into the appropriate stack slot.
976    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
977      .addReg(FramePtr, RegState::Kill);
978
979    if (needsFrameMoves) {
980      // Mark the place where EBP/RBP was saved.
981      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
982      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
983
984      // Define the current CFA rule to use the provided offset.
985      if (StackSize) {
986        MachineLocation SPDst(MachineLocation::VirtualFP);
987        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
988        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
989      } else {
990        // FIXME: Verify & implement for FP
991        MachineLocation SPDst(StackPtr);
992        MachineLocation SPSrc(StackPtr, stackGrowth);
993        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
994      }
995
996      // Change the rule for the FramePtr to be an "offset" rule.
997      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
998      MachineLocation FPSrc(FramePtr);
999      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
1000    }
1001
1002    // Update EBP with the new base value...
1003    BuildMI(MBB, MBBI, DL,
1004            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
1005        .addReg(StackPtr);
1006
1007    if (needsFrameMoves) {
1008      // Mark effective beginning of when frame pointer becomes valid.
1009      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
1010      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
1011
1012      // Define the current CFA to use the EBP/RBP register.
1013      MachineLocation FPDst(FramePtr);
1014      MachineLocation FPSrc(MachineLocation::VirtualFP);
1015      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
1016    }
1017
1018    // Mark the FramePtr as live-in in every block except the entry.
1019    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
1020         I != E; ++I)
1021      I->addLiveIn(FramePtr);
1022
1023    // Realign stack
1024    if (needsStackRealignment(MF)) {
1025      MachineInstr *MI =
1026        BuildMI(MBB, MBBI, DL,
1027                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
1028                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
1029
1030      // The EFLAGS implicit def is dead.
1031      MI->getOperand(3).setIsDead();
1032    }
1033  } else {
1034    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
1035  }
1036
1037  // Skip the callee-saved push instructions.
1038  bool PushedRegs = false;
1039  int StackOffset = 2 * stackGrowth;
1040
1041  while (MBBI != MBB.end() &&
1042         (MBBI->getOpcode() == X86::PUSH32r ||
1043          MBBI->getOpcode() == X86::PUSH64r)) {
1044    PushedRegs = true;
1045    ++MBBI;
1046
1047    if (!HasFP && needsFrameMoves) {
1048      // Mark callee-saved push instruction.
1049      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
1050      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
1051
1052      // Define the current CFA rule to use the provided offset.
1053      unsigned Ptr = StackSize ?
1054        MachineLocation::VirtualFP : StackPtr;
1055      MachineLocation SPDst(Ptr);
1056      MachineLocation SPSrc(Ptr, StackOffset);
1057      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1058      StackOffset += stackGrowth;
1059    }
1060  }
1061
1062  DL = MBB.findDebugLoc(MBBI);
1063
1064  // Adjust stack pointer: ESP -= numbytes.
1065  if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
1066    // Check, whether EAX is livein for this function.
1067    bool isEAXAlive = false;
1068    for (MachineRegisterInfo::livein_iterator
1069           II = MF.getRegInfo().livein_begin(),
1070           EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
1071      unsigned Reg = II->first;
1072      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
1073                    Reg == X86::AH || Reg == X86::AL);
1074    }
1075
1076    // Function prologue calls _alloca to probe the stack when allocating more
1077    // than 4k bytes in one go. Touching the stack at 4K increments is necessary
1078    // to ensure that the guard pages used by the OS virtual memory manager are
1079    // allocated in correct sequence.
1080    if (!isEAXAlive) {
1081      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1082        .addImm(NumBytes);
1083      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
1084        .addExternalSymbol("_alloca")
1085        .addReg(StackPtr, RegState::Define | RegState::Implicit);
1086    } else {
1087      // Save EAX
1088      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1089        .addReg(X86::EAX, RegState::Kill);
1090
1091      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
1092      // allocated bytes for EAX.
1093      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1094        .addImm(NumBytes - 4);
1095      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
1096        .addExternalSymbol("_alloca")
1097        .addReg(StackPtr, RegState::Define | RegState::Implicit);
1098
1099      // Restore EAX
1100      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
1101                                              X86::EAX),
1102                                      StackPtr, false, NumBytes - 4);
1103      MBB.insert(MBBI, MI);
1104    }
1105  } else if (NumBytes) {
1106    // If there is an SUB32ri of ESP immediately before this instruction, merge
1107    // the two. This can be the case when tail call elimination is enabled and
1108    // the callee has more arguments then the caller.
1109    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
1110
1111    // If there is an ADD32ri or SUB32ri of ESP immediately after this
1112    // instruction, merge the two instructions.
1113    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
1114
1115    if (NumBytes)
1116      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
1117  }
1118
1119  if ((NumBytes || PushedRegs) && needsFrameMoves) {
1120    // Mark end of stack pointer adjustment.
1121    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
1122    BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
1123
1124    if (!HasFP && NumBytes) {
1125      // Define the current CFA rule to use the provided offset.
1126      if (StackSize) {
1127        MachineLocation SPDst(MachineLocation::VirtualFP);
1128        MachineLocation SPSrc(MachineLocation::VirtualFP,
1129                              -StackSize + stackGrowth);
1130        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1131      } else {
1132        // FIXME: Verify & implement for FP
1133        MachineLocation SPDst(StackPtr);
1134        MachineLocation SPSrc(StackPtr, stackGrowth);
1135        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
1136      }
1137    }
1138
1139    // Emit DWARF info specifying the offsets of the callee-saved registers.
1140    if (PushedRegs)
1141      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
1142  }
1143}
1144
1145void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
1146                                   MachineBasicBlock &MBB) const {
1147  const MachineFrameInfo *MFI = MF.getFrameInfo();
1148  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1149  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1150  unsigned RetOpcode = MBBI->getOpcode();
1151  DebugLoc DL = MBBI->getDebugLoc();
1152
1153  switch (RetOpcode) {
1154  default:
1155    llvm_unreachable("Can only insert epilog into returning blocks");
1156  case X86::RET:
1157  case X86::RETI:
1158  case X86::TCRETURNdi:
1159  case X86::TCRETURNri:
1160  case X86::TCRETURNmi:
1161  case X86::TCRETURNdi64:
1162  case X86::TCRETURNri64:
1163  case X86::TCRETURNmi64:
1164  case X86::EH_RETURN:
1165  case X86::EH_RETURN64:
1166    break;  // These are ok
1167  }
1168
1169  // Get the number of bytes to allocate from the FrameInfo.
1170  uint64_t StackSize = MFI->getStackSize();
1171  uint64_t MaxAlign  = MFI->getMaxAlignment();
1172  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1173  uint64_t NumBytes = 0;
1174
1175  if (hasFP(MF)) {
1176    // Calculate required stack adjustment.
1177    uint64_t FrameSize = StackSize - SlotSize;
1178    if (needsStackRealignment(MF))
1179      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
1180
1181    NumBytes = FrameSize - CSSize;
1182
1183    // Pop EBP.
1184    BuildMI(MBB, MBBI, DL,
1185            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
1186  } else {
1187    NumBytes = StackSize - CSSize;
1188  }
1189
1190  // Skip the callee-saved pop instructions.
1191  MachineBasicBlock::iterator LastCSPop = MBBI;
1192  while (MBBI != MBB.begin()) {
1193    MachineBasicBlock::iterator PI = prior(MBBI);
1194    unsigned Opc = PI->getOpcode();
1195
1196    if (Opc != X86::POP32r && Opc != X86::POP64r &&
1197        !PI->getDesc().isTerminator())
1198      break;
1199
1200    --MBBI;
1201  }
1202
1203  DL = MBBI->getDebugLoc();
1204
1205  // If there is an ADD32ri or SUB32ri of ESP immediately before this
1206  // instruction, merge the two instructions.
1207  if (NumBytes || MFI->hasVarSizedObjects())
1208    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
1209
1210  // If dynamic alloca is used, then reset esp to point to the last callee-saved
1211  // slot before popping them off! Same applies for the case, when stack was
1212  // realigned.
1213  if (needsStackRealignment(MF)) {
1214    // We cannot use LEA here, because stack pointer was realigned. We need to
1215    // deallocate local frame back.
1216    if (CSSize) {
1217      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
1218      MBBI = prior(LastCSPop);
1219    }
1220
1221    BuildMI(MBB, MBBI, DL,
1222            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
1223            StackPtr).addReg(FramePtr);
1224  } else if (MFI->hasVarSizedObjects()) {
1225    if (CSSize) {
1226      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
1227      MachineInstr *MI =
1228        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
1229                     FramePtr, false, -CSSize);
1230      MBB.insert(MBBI, MI);
1231    } else {
1232      BuildMI(MBB, MBBI, DL,
1233              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
1234        .addReg(FramePtr);
1235    }
1236  } else if (NumBytes) {
1237    // Adjust stack pointer back: ESP += numbytes.
1238    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
1239  }
1240
1241  // We're returning from function via eh_return.
1242  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
1243    MBBI = prior(MBB.end());
1244    MachineOperand &DestAddr  = MBBI->getOperand(0);
1245    assert(DestAddr.isReg() && "Offset should be in register!");
1246    BuildMI(MBB, MBBI, DL,
1247            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
1248            StackPtr).addReg(DestAddr.getReg());
1249  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
1250             RetOpcode == X86::TCRETURNmi ||
1251             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
1252             RetOpcode == X86::TCRETURNmi64) {
1253    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
1254    // Tail call return: adjust the stack pointer and jump to callee.
1255    MBBI = prior(MBB.end());
1256    MachineOperand &JumpTarget = MBBI->getOperand(0);
1257    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
1258    assert(StackAdjust.isImm() && "Expecting immediate value.");
1259
1260    // Adjust stack pointer.
1261    int StackAdj = StackAdjust.getImm();
1262    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
1263    int Offset = 0;
1264    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
1265
1266    // Incoporate the retaddr area.
1267    Offset = StackAdj-MaxTCDelta;
1268    assert(Offset >= 0 && "Offset should never be negative");
1269
1270    if (Offset) {
1271      // Check for possible merge with preceeding ADD instruction.
1272      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1273      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
1274    }
1275
1276    // Jump to label or value in register.
1277    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
1278      BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
1279                                     ? X86::TAILJMPd : X86::TAILJMPd64)).
1280        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
1281                         JumpTarget.getTargetFlags());
1282    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
1283      MachineInstrBuilder MIB =
1284        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
1285                                       ? X86::TAILJMPm : X86::TAILJMPm64));
1286      for (unsigned i = 0; i != 5; ++i)
1287        MIB.addOperand(MBBI->getOperand(i));
1288    } else if (RetOpcode == X86::TCRETURNri64) {
1289      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
1290        addReg(JumpTarget.getReg(), RegState::Kill);
1291    } else {
1292      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
1293        addReg(JumpTarget.getReg(), RegState::Kill);
1294    }
1295
1296    MachineInstr *NewMI = prior(MBBI);
1297    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
1298      NewMI->addOperand(MBBI->getOperand(i));
1299
1300    // Delete the pseudo instruction TCRETURN.
1301    MBB.erase(MBBI);
1302  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
1303             (X86FI->getTCReturnAddrDelta() < 0)) {
1304    // Add the return addr area delta back since we are not tail calling.
1305    int delta = -1*X86FI->getTCReturnAddrDelta();
1306    MBBI = prior(MBB.end());
1307
1308    // Check for possible merge with preceeding ADD instruction.
1309    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1310    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
1311  }
1312}
1313
1314unsigned X86RegisterInfo::getRARegister() const {
1315  return Is64Bit ? X86::RIP     // Should have dwarf #16.
1316                 : X86::EIP;    // Should have dwarf #8.
1317}
1318
1319unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1320  return hasFP(MF) ? FramePtr : StackPtr;
1321}
1322
1323void
1324X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
1325  // Calculate amount of bytes used for return address storing
1326  int stackGrowth = (Is64Bit ? -8 : -4);
1327
1328  // Initial state of the frame pointer is esp+stackGrowth.
1329  MachineLocation Dst(MachineLocation::VirtualFP);
1330  MachineLocation Src(StackPtr, stackGrowth);
1331  Moves.push_back(MachineMove(0, Dst, Src));
1332
1333  // Add return address to move list
1334  MachineLocation CSDst(StackPtr, stackGrowth);
1335  MachineLocation CSSrc(getRARegister());
1336  Moves.push_back(MachineMove(0, CSDst, CSSrc));
1337}
1338
1339unsigned X86RegisterInfo::getEHExceptionRegister() const {
1340  llvm_unreachable("What is the exception register");
1341  return 0;
1342}
1343
1344unsigned X86RegisterInfo::getEHHandlerRegister() const {
1345  llvm_unreachable("What is the exception handler register");
1346  return 0;
1347}
1348
1349namespace llvm {
1350unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
1351  switch (VT.getSimpleVT().SimpleTy) {
1352  default: return Reg;
1353  case MVT::i8:
1354    if (High) {
1355      switch (Reg) {
1356      default: return 0;
1357      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1358        return X86::AH;
1359      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1360        return X86::DH;
1361      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1362        return X86::CH;
1363      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1364        return X86::BH;
1365      }
1366    } else {
1367      switch (Reg) {
1368      default: return 0;
1369      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1370        return X86::AL;
1371      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1372        return X86::DL;
1373      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1374        return X86::CL;
1375      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1376        return X86::BL;
1377      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1378        return X86::SIL;
1379      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1380        return X86::DIL;
1381      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1382        return X86::BPL;
1383      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1384        return X86::SPL;
1385      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1386        return X86::R8B;
1387      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1388        return X86::R9B;
1389      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1390        return X86::R10B;
1391      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1392        return X86::R11B;
1393      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1394        return X86::R12B;
1395      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1396        return X86::R13B;
1397      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1398        return X86::R14B;
1399      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1400        return X86::R15B;
1401      }
1402    }
1403  case MVT::i16:
1404    switch (Reg) {
1405    default: return Reg;
1406    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1407      return X86::AX;
1408    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1409      return X86::DX;
1410    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1411      return X86::CX;
1412    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1413      return X86::BX;
1414    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1415      return X86::SI;
1416    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1417      return X86::DI;
1418    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1419      return X86::BP;
1420    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1421      return X86::SP;
1422    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1423      return X86::R8W;
1424    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1425      return X86::R9W;
1426    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1427      return X86::R10W;
1428    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1429      return X86::R11W;
1430    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1431      return X86::R12W;
1432    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1433      return X86::R13W;
1434    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1435      return X86::R14W;
1436    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1437      return X86::R15W;
1438    }
1439  case MVT::i32:
1440    switch (Reg) {
1441    default: return Reg;
1442    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1443      return X86::EAX;
1444    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1445      return X86::EDX;
1446    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1447      return X86::ECX;
1448    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1449      return X86::EBX;
1450    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1451      return X86::ESI;
1452    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1453      return X86::EDI;
1454    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1455      return X86::EBP;
1456    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1457      return X86::ESP;
1458    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1459      return X86::R8D;
1460    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1461      return X86::R9D;
1462    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1463      return X86::R10D;
1464    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1465      return X86::R11D;
1466    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1467      return X86::R12D;
1468    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1469      return X86::R13D;
1470    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1471      return X86::R14D;
1472    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1473      return X86::R15D;
1474    }
1475  case MVT::i64:
1476    switch (Reg) {
1477    default: return Reg;
1478    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
1479      return X86::RAX;
1480    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
1481      return X86::RDX;
1482    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
1483      return X86::RCX;
1484    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
1485      return X86::RBX;
1486    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
1487      return X86::RSI;
1488    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
1489      return X86::RDI;
1490    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
1491      return X86::RBP;
1492    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
1493      return X86::RSP;
1494    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
1495      return X86::R8;
1496    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
1497      return X86::R9;
1498    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
1499      return X86::R10;
1500    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
1501      return X86::R11;
1502    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
1503      return X86::R12;
1504    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
1505      return X86::R13;
1506    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
1507      return X86::R14;
1508    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
1509      return X86::R15;
1510    }
1511  }
1512
1513  return Reg;
1514}
1515}
1516
1517#include "X86GenRegisterInfo.inc"
1518
1519namespace {
1520  struct MSAH : public MachineFunctionPass {
1521    static char ID;
1522    MSAH() : MachineFunctionPass(&ID) {}
1523
1524    virtual bool runOnMachineFunction(MachineFunction &MF) {
1525      const X86TargetMachine *TM =
1526        static_cast<const X86TargetMachine *>(&MF.getTarget());
1527      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
1528      MachineRegisterInfo &RI = MF.getRegInfo();
1529      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1530      unsigned StackAlignment = X86RI->getStackAlignment();
1531
1532      // Be over-conservative: scan over all vreg defs and find whether vector
1533      // registers are used. If yes, there is a possibility that vector register
1534      // will be spilled and thus require dynamic stack realignment.
1535      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
1536           RegNum < RI.getLastVirtReg(); ++RegNum)
1537        if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
1538          FuncInfo->setReserveFP(true);
1539          return true;
1540        }
1541
1542      // Nothing to do
1543      return false;
1544    }
1545
1546    virtual const char *getPassName() const {
1547      return "X86 Maximal Stack Alignment Check";
1548    }
1549
1550    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
1551      AU.setPreservesCFG();
1552      MachineFunctionPass::getAnalysisUsage(AU);
1553    }
1554  };
1555
1556  char MSAH::ID = 0;
1557}
1558
1559FunctionPass*
1560llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
1561