X86FastISel.cpp revision 29689434635364346cbef6f4e309f1d9fcdd5d80
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/CallingConv.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/GlobalVariable.h"
25#include "llvm/Instructions.h"
26#include "llvm/IntrinsicInst.h"
27#include "llvm/CodeGen/FastISel.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/CallSite.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/GetElementPtrTypeIterator.h"
34#include "llvm/Target/TargetOptions.h"
35using namespace llvm;
36
37namespace {
38
39class X86FastISel : public FastISel {
40  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
41  /// make the right decision when generating code for different targets.
42  const X86Subtarget *Subtarget;
43
44  /// StackPtr - Register used as the stack pointer.
45  ///
46  unsigned StackPtr;
47
48  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
49  /// floating point ops.
50  /// When SSE is available, use it for f32 operations.
51  /// When SSE2 is available, use it for f64 operations.
52  bool X86ScalarSSEf64;
53  bool X86ScalarSSEf32;
54
55public:
56  explicit X86FastISel(MachineFunction &mf,
57                       MachineModuleInfo *mmi,
58                       DwarfWriter *dw,
59                       DenseMap<const Value *, unsigned> &vm,
60                       DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
61                       DenseMap<const AllocaInst *, int> &am
62#ifndef NDEBUG
63                       , SmallSet<Instruction*, 8> &cil
64#endif
65                       )
66    : FastISel(mf, mmi, dw, vm, bm, am
67#ifndef NDEBUG
68               , cil
69#endif
70               ) {
71    Subtarget = &TM.getSubtarget<X86Subtarget>();
72    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
73    X86ScalarSSEf64 = Subtarget->hasSSE2();
74    X86ScalarSSEf32 = Subtarget->hasSSE1();
75  }
76
77  virtual bool TargetSelectInstruction(Instruction *I);
78
79#include "X86GenFastISel.inc"
80
81private:
82  bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
83
84  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
85
86  bool X86FastEmitStore(EVT VT, Value *Val,
87                        const X86AddressMode &AM);
88  bool X86FastEmitStore(EVT VT, unsigned Val,
89                        const X86AddressMode &AM);
90
91  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
92                         unsigned &ResultReg);
93
94  bool X86SelectAddress(Value *V, X86AddressMode &AM);
95  bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
96
97  bool X86SelectLoad(Instruction *I);
98
99  bool X86SelectStore(Instruction *I);
100
101  bool X86SelectCmp(Instruction *I);
102
103  bool X86SelectZExt(Instruction *I);
104
105  bool X86SelectBranch(Instruction *I);
106
107  bool X86SelectShift(Instruction *I);
108
109  bool X86SelectSelect(Instruction *I);
110
111  bool X86SelectTrunc(Instruction *I);
112
113  bool X86SelectFPExt(Instruction *I);
114  bool X86SelectFPTrunc(Instruction *I);
115
116  bool X86SelectExtractValue(Instruction *I);
117
118  bool X86VisitIntrinsicCall(IntrinsicInst &I);
119  bool X86SelectCall(Instruction *I);
120
121  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
122
123  const X86InstrInfo *getInstrInfo() const {
124    return getTargetMachine()->getInstrInfo();
125  }
126  const X86TargetMachine *getTargetMachine() const {
127    return static_cast<const X86TargetMachine *>(&TM);
128  }
129
130  unsigned TargetMaterializeConstant(Constant *C);
131
132  unsigned TargetMaterializeAlloca(AllocaInst *C);
133
134  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
135  /// computed in an SSE register, not on the X87 floating point stack.
136  bool isScalarFPTypeInSSEReg(EVT VT) const {
137    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
138      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
139  }
140
141  bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
142};
143
144} // end anonymous namespace.
145
146bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
147  VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
148  if (VT == MVT::Other || !VT.isSimple())
149    // Unhandled type. Halt "fast" selection and bail.
150    return false;
151
152  // For now, require SSE/SSE2 for performing floating-point operations,
153  // since x87 requires additional work.
154  if (VT == MVT::f64 && !X86ScalarSSEf64)
155     return false;
156  if (VT == MVT::f32 && !X86ScalarSSEf32)
157     return false;
158  // Similarly, no f80 support yet.
159  if (VT == MVT::f80)
160    return false;
161  // We only handle legal types. For example, on x86-32 the instruction
162  // selector contains all of the 64-bit instructions from x86-64,
163  // under the assumption that i64 won't be used if the target doesn't
164  // support it.
165  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
166}
167
168#include "X86GenCallingConv.inc"
169
170/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
171/// convention.
172CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
173                                           bool isTaillCall) {
174  if (Subtarget->is64Bit()) {
175    if (CC == CallingConv::GHC)
176      return CC_X86_64_GHC;
177    else if (Subtarget->isTargetWin64())
178      return CC_X86_Win64_C;
179    else
180      return CC_X86_64_C;
181  }
182
183  if (CC == CallingConv::X86_FastCall)
184    return CC_X86_32_FastCall;
185  else if (CC == CallingConv::Fast)
186    return CC_X86_32_FastCC;
187  else if (CC == CallingConv::GHC)
188    return CC_X86_32_GHC;
189  else
190    return CC_X86_32_C;
191}
192
193/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
194/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
195/// Return true and the result register by reference if it is possible.
196bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
197                                  unsigned &ResultReg) {
198  // Get opcode and regclass of the output for the given load instruction.
199  unsigned Opc = 0;
200  const TargetRegisterClass *RC = NULL;
201  switch (VT.getSimpleVT().SimpleTy) {
202  default: return false;
203  case MVT::i1:
204  case MVT::i8:
205    Opc = X86::MOV8rm;
206    RC  = X86::GR8RegisterClass;
207    break;
208  case MVT::i16:
209    Opc = X86::MOV16rm;
210    RC  = X86::GR16RegisterClass;
211    break;
212  case MVT::i32:
213    Opc = X86::MOV32rm;
214    RC  = X86::GR32RegisterClass;
215    break;
216  case MVT::i64:
217    // Must be in x86-64 mode.
218    Opc = X86::MOV64rm;
219    RC  = X86::GR64RegisterClass;
220    break;
221  case MVT::f32:
222    if (Subtarget->hasSSE1()) {
223      Opc = X86::MOVSSrm;
224      RC  = X86::FR32RegisterClass;
225    } else {
226      Opc = X86::LD_Fp32m;
227      RC  = X86::RFP32RegisterClass;
228    }
229    break;
230  case MVT::f64:
231    if (Subtarget->hasSSE2()) {
232      Opc = X86::MOVSDrm;
233      RC  = X86::FR64RegisterClass;
234    } else {
235      Opc = X86::LD_Fp64m;
236      RC  = X86::RFP64RegisterClass;
237    }
238    break;
239  case MVT::f80:
240    // No f80 support yet.
241    return false;
242  }
243
244  ResultReg = createResultReg(RC);
245  addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
246  return true;
247}
248
249/// X86FastEmitStore - Emit a machine instruction to store a value Val of
250/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
251/// and a displacement offset, or a GlobalAddress,
252/// i.e. V. Return true if it is possible.
253bool
254X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
255                              const X86AddressMode &AM) {
256  // Get opcode and regclass of the output for the given store instruction.
257  unsigned Opc = 0;
258  switch (VT.getSimpleVT().SimpleTy) {
259  case MVT::f80: // No f80 support yet.
260  default: return false;
261  case MVT::i1: {
262    // Mask out all but lowest bit.
263    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
264    BuildMI(MBB, DL,
265            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
266    Val = AndResult;
267  }
268  // FALLTHROUGH, handling i1 as i8.
269  case MVT::i8:  Opc = X86::MOV8mr;  break;
270  case MVT::i16: Opc = X86::MOV16mr; break;
271  case MVT::i32: Opc = X86::MOV32mr; break;
272  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
273  case MVT::f32:
274    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
275    break;
276  case MVT::f64:
277    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
278    break;
279  }
280
281  addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
282  return true;
283}
284
285bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
286                                   const X86AddressMode &AM) {
287  // Handle 'null' like i32/i64 0.
288  if (isa<ConstantPointerNull>(Val))
289    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
290
291  // If this is a store of a simple constant, fold the constant into the store.
292  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
293    unsigned Opc = 0;
294    bool Signed = true;
295    switch (VT.getSimpleVT().SimpleTy) {
296    default: break;
297    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
298    case MVT::i8:  Opc = X86::MOV8mi;  break;
299    case MVT::i16: Opc = X86::MOV16mi; break;
300    case MVT::i32: Opc = X86::MOV32mi; break;
301    case MVT::i64:
302      // Must be a 32-bit sign extended value.
303      if ((int)CI->getSExtValue() == CI->getSExtValue())
304        Opc = X86::MOV64mi32;
305      break;
306    }
307
308    if (Opc) {
309      addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
310                             .addImm(Signed ? CI->getSExtValue() :
311                                              CI->getZExtValue());
312      return true;
313    }
314  }
315
316  unsigned ValReg = getRegForValue(Val);
317  if (ValReg == 0)
318    return false;
319
320  return X86FastEmitStore(VT, ValReg, AM);
321}
322
323/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
324/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
325/// ISD::SIGN_EXTEND).
326bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
327                                    unsigned Src, EVT SrcVT,
328                                    unsigned &ResultReg) {
329  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
330
331  if (RR != 0) {
332    ResultReg = RR;
333    return true;
334  } else
335    return false;
336}
337
338/// X86SelectAddress - Attempt to fill in an address from the given value.
339///
340bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
341  User *U = NULL;
342  unsigned Opcode = Instruction::UserOp1;
343  if (Instruction *I = dyn_cast<Instruction>(V)) {
344    Opcode = I->getOpcode();
345    U = I;
346  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
347    Opcode = C->getOpcode();
348    U = C;
349  }
350
351  switch (Opcode) {
352  default: break;
353  case Instruction::BitCast:
354    // Look past bitcasts.
355    return X86SelectAddress(U->getOperand(0), AM);
356
357  case Instruction::IntToPtr:
358    // Look past no-op inttoptrs.
359    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
360      return X86SelectAddress(U->getOperand(0), AM);
361    break;
362
363  case Instruction::PtrToInt:
364    // Look past no-op ptrtoints.
365    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
366      return X86SelectAddress(U->getOperand(0), AM);
367    break;
368
369  case Instruction::Alloca: {
370    // Do static allocas.
371    const AllocaInst *A = cast<AllocaInst>(V);
372    DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
373    if (SI != StaticAllocaMap.end()) {
374      AM.BaseType = X86AddressMode::FrameIndexBase;
375      AM.Base.FrameIndex = SI->second;
376      return true;
377    }
378    break;
379  }
380
381  case Instruction::Add: {
382    // Adds of constants are common and easy enough.
383    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
384      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
385      // They have to fit in the 32-bit signed displacement field though.
386      if (isInt32(Disp)) {
387        AM.Disp = (uint32_t)Disp;
388        return X86SelectAddress(U->getOperand(0), AM);
389      }
390    }
391    break;
392  }
393
394  case Instruction::GetElementPtr: {
395    X86AddressMode SavedAM = AM;
396
397    // Pattern-match simple GEPs.
398    uint64_t Disp = (int32_t)AM.Disp;
399    unsigned IndexReg = AM.IndexReg;
400    unsigned Scale = AM.Scale;
401    gep_type_iterator GTI = gep_type_begin(U);
402    // Iterate through the indices, folding what we can. Constants can be
403    // folded, and one dynamic index can be handled, if the scale is supported.
404    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
405         i != e; ++i, ++GTI) {
406      Value *Op = *i;
407      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
408        const StructLayout *SL = TD.getStructLayout(STy);
409        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
410        Disp += SL->getElementOffset(Idx);
411      } else {
412        uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
413        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
414          // Constant-offset addressing.
415          Disp += CI->getSExtValue() * S;
416        } else if (IndexReg == 0 &&
417                   (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
418                   (S == 1 || S == 2 || S == 4 || S == 8)) {
419          // Scaled-index addressing.
420          Scale = S;
421          IndexReg = getRegForGEPIndex(Op);
422          if (IndexReg == 0)
423            return false;
424        } else
425          // Unsupported.
426          goto unsupported_gep;
427      }
428    }
429    // Check for displacement overflow.
430    if (!isInt32(Disp))
431      break;
432    // Ok, the GEP indices were covered by constant-offset and scaled-index
433    // addressing. Update the address state and move on to examining the base.
434    AM.IndexReg = IndexReg;
435    AM.Scale = Scale;
436    AM.Disp = (uint32_t)Disp;
437    if (X86SelectAddress(U->getOperand(0), AM))
438      return true;
439
440    // If we couldn't merge the sub value into this addr mode, revert back to
441    // our address and just match the value instead of completely failing.
442    AM = SavedAM;
443    break;
444  unsupported_gep:
445    // Ok, the GEP indices weren't all covered.
446    break;
447  }
448  }
449
450  // Handle constant address.
451  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
452    // Can't handle alternate code models yet.
453    if (TM.getCodeModel() != CodeModel::Small)
454      return false;
455
456    // RIP-relative addresses can't have additional register operands.
457    if (Subtarget->isPICStyleRIPRel() &&
458        (AM.Base.Reg != 0 || AM.IndexReg != 0))
459      return false;
460
461    // Can't handle TLS yet.
462    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
463      if (GVar->isThreadLocal())
464        return false;
465
466    // Okay, we've committed to selecting this global. Set up the basic address.
467    AM.GV = GV;
468
469    // Allow the subtarget to classify the global.
470    unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
471
472    // If this reference is relative to the pic base, set it now.
473    if (isGlobalRelativeToPICBase(GVFlags)) {
474      // FIXME: How do we know Base.Reg is free??
475      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
476    }
477
478    // Unless the ABI requires an extra load, return a direct reference to
479    // the global.
480    if (!isGlobalStubReference(GVFlags)) {
481      if (Subtarget->isPICStyleRIPRel()) {
482        // Use rip-relative addressing if we can.  Above we verified that the
483        // base and index registers are unused.
484        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
485        AM.Base.Reg = X86::RIP;
486      }
487      AM.GVOpFlags = GVFlags;
488      return true;
489    }
490
491    // Ok, we need to do a load from a stub.  If we've already loaded from this
492    // stub, reuse the loaded pointer, otherwise emit the load now.
493    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
494    unsigned LoadReg;
495    if (I != LocalValueMap.end() && I->second != 0) {
496      LoadReg = I->second;
497    } else {
498      // Issue load from stub.
499      unsigned Opc = 0;
500      const TargetRegisterClass *RC = NULL;
501      X86AddressMode StubAM;
502      StubAM.Base.Reg = AM.Base.Reg;
503      StubAM.GV = GV;
504      StubAM.GVOpFlags = GVFlags;
505
506      if (TLI.getPointerTy() == MVT::i64) {
507        Opc = X86::MOV64rm;
508        RC  = X86::GR64RegisterClass;
509
510        if (Subtarget->isPICStyleRIPRel())
511          StubAM.Base.Reg = X86::RIP;
512      } else {
513        Opc = X86::MOV32rm;
514        RC  = X86::GR32RegisterClass;
515      }
516
517      LoadReg = createResultReg(RC);
518      addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
519
520      // Prevent loading GV stub multiple times in same MBB.
521      LocalValueMap[V] = LoadReg;
522    }
523
524    // Now construct the final address. Note that the Disp, Scale,
525    // and Index values may already be set here.
526    AM.Base.Reg = LoadReg;
527    AM.GV = 0;
528    return true;
529  }
530
531  // If all else fails, try to materialize the value in a register.
532  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
533    if (AM.Base.Reg == 0) {
534      AM.Base.Reg = getRegForValue(V);
535      return AM.Base.Reg != 0;
536    }
537    if (AM.IndexReg == 0) {
538      assert(AM.Scale == 1 && "Scale with no index!");
539      AM.IndexReg = getRegForValue(V);
540      return AM.IndexReg != 0;
541    }
542  }
543
544  return false;
545}
546
547/// X86SelectCallAddress - Attempt to fill in an address from the given value.
548///
549bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
550  User *U = NULL;
551  unsigned Opcode = Instruction::UserOp1;
552  if (Instruction *I = dyn_cast<Instruction>(V)) {
553    Opcode = I->getOpcode();
554    U = I;
555  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
556    Opcode = C->getOpcode();
557    U = C;
558  }
559
560  switch (Opcode) {
561  default: break;
562  case Instruction::BitCast:
563    // Look past bitcasts.
564    return X86SelectCallAddress(U->getOperand(0), AM);
565
566  case Instruction::IntToPtr:
567    // Look past no-op inttoptrs.
568    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
569      return X86SelectCallAddress(U->getOperand(0), AM);
570    break;
571
572  case Instruction::PtrToInt:
573    // Look past no-op ptrtoints.
574    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
575      return X86SelectCallAddress(U->getOperand(0), AM);
576    break;
577  }
578
579  // Handle constant address.
580  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
581    // Can't handle alternate code models yet.
582    if (TM.getCodeModel() != CodeModel::Small)
583      return false;
584
585    // RIP-relative addresses can't have additional register operands.
586    if (Subtarget->isPICStyleRIPRel() &&
587        (AM.Base.Reg != 0 || AM.IndexReg != 0))
588      return false;
589
590    // Can't handle TLS or DLLImport.
591    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
592      if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
593        return false;
594
595    // Okay, we've committed to selecting this global. Set up the basic address.
596    AM.GV = GV;
597
598    // No ABI requires an extra load for anything other than DLLImport, which
599    // we rejected above. Return a direct reference to the global.
600    if (Subtarget->isPICStyleRIPRel()) {
601      // Use rip-relative addressing if we can.  Above we verified that the
602      // base and index registers are unused.
603      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
604      AM.Base.Reg = X86::RIP;
605    } else if (Subtarget->isPICStyleStubPIC()) {
606      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
607    } else if (Subtarget->isPICStyleGOT()) {
608      AM.GVOpFlags = X86II::MO_GOTOFF;
609    }
610
611    return true;
612  }
613
614  // If all else fails, try to materialize the value in a register.
615  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
616    if (AM.Base.Reg == 0) {
617      AM.Base.Reg = getRegForValue(V);
618      return AM.Base.Reg != 0;
619    }
620    if (AM.IndexReg == 0) {
621      assert(AM.Scale == 1 && "Scale with no index!");
622      AM.IndexReg = getRegForValue(V);
623      return AM.IndexReg != 0;
624    }
625  }
626
627  return false;
628}
629
630
631/// X86SelectStore - Select and emit code to implement store instructions.
632bool X86FastISel::X86SelectStore(Instruction* I) {
633  EVT VT;
634  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
635    return false;
636
637  X86AddressMode AM;
638  if (!X86SelectAddress(I->getOperand(1), AM))
639    return false;
640
641  return X86FastEmitStore(VT, I->getOperand(0), AM);
642}
643
644/// X86SelectLoad - Select and emit code to implement load instructions.
645///
646bool X86FastISel::X86SelectLoad(Instruction *I)  {
647  EVT VT;
648  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
649    return false;
650
651  X86AddressMode AM;
652  if (!X86SelectAddress(I->getOperand(0), AM))
653    return false;
654
655  unsigned ResultReg = 0;
656  if (X86FastEmitLoad(VT, AM, ResultReg)) {
657    UpdateValueMap(I, ResultReg);
658    return true;
659  }
660  return false;
661}
662
663static unsigned X86ChooseCmpOpcode(EVT VT) {
664  switch (VT.getSimpleVT().SimpleTy) {
665  default:       return 0;
666  case MVT::i8:  return X86::CMP8rr;
667  case MVT::i16: return X86::CMP16rr;
668  case MVT::i32: return X86::CMP32rr;
669  case MVT::i64: return X86::CMP64rr;
670  case MVT::f32: return X86::UCOMISSrr;
671  case MVT::f64: return X86::UCOMISDrr;
672  }
673}
674
675/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
676/// of the comparison, return an opcode that works for the compare (e.g.
677/// CMP32ri) otherwise return 0.
678static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
679  switch (VT.getSimpleVT().SimpleTy) {
680  // Otherwise, we can't fold the immediate into this comparison.
681  default: return 0;
682  case MVT::i8: return X86::CMP8ri;
683  case MVT::i16: return X86::CMP16ri;
684  case MVT::i32: return X86::CMP32ri;
685  case MVT::i64:
686    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
687    // field.
688    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
689      return X86::CMP64ri32;
690    return 0;
691  }
692}
693
694bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
695  unsigned Op0Reg = getRegForValue(Op0);
696  if (Op0Reg == 0) return false;
697
698  // Handle 'null' like i32/i64 0.
699  if (isa<ConstantPointerNull>(Op1))
700    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
701
702  // We have two options: compare with register or immediate.  If the RHS of
703  // the compare is an immediate that we can fold into this compare, use
704  // CMPri, otherwise use CMPrr.
705  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
706    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
707      BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
708                                          .addImm(Op1C->getSExtValue());
709      return true;
710    }
711  }
712
713  unsigned CompareOpc = X86ChooseCmpOpcode(VT);
714  if (CompareOpc == 0) return false;
715
716  unsigned Op1Reg = getRegForValue(Op1);
717  if (Op1Reg == 0) return false;
718  BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
719
720  return true;
721}
722
723bool X86FastISel::X86SelectCmp(Instruction *I) {
724  CmpInst *CI = cast<CmpInst>(I);
725
726  EVT VT;
727  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
728    return false;
729
730  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
731  unsigned SetCCOpc;
732  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
733  switch (CI->getPredicate()) {
734  case CmpInst::FCMP_OEQ: {
735    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
736      return false;
737
738    unsigned EReg = createResultReg(&X86::GR8RegClass);
739    unsigned NPReg = createResultReg(&X86::GR8RegClass);
740    BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
741    BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
742    BuildMI(MBB, DL,
743            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
744    UpdateValueMap(I, ResultReg);
745    return true;
746  }
747  case CmpInst::FCMP_UNE: {
748    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
749      return false;
750
751    unsigned NEReg = createResultReg(&X86::GR8RegClass);
752    unsigned PReg = createResultReg(&X86::GR8RegClass);
753    BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
754    BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
755    BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
756    UpdateValueMap(I, ResultReg);
757    return true;
758  }
759  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
760  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
761  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
762  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
763  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
764  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
765  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
766  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
767  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
768  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
769  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
770  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
771
772  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
773  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
774  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
775  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
776  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
777  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
778  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
779  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
780  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
781  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
782  default:
783    return false;
784  }
785
786  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
787  if (SwapArgs)
788    std::swap(Op0, Op1);
789
790  // Emit a compare of Op0/Op1.
791  if (!X86FastEmitCompare(Op0, Op1, VT))
792    return false;
793
794  BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
795  UpdateValueMap(I, ResultReg);
796  return true;
797}
798
799bool X86FastISel::X86SelectZExt(Instruction *I) {
800  // Handle zero-extension from i1 to i8, which is common.
801  if (I->getType()->isIntegerTy(8) &&
802      I->getOperand(0)->getType()->isIntegerTy(1)) {
803    unsigned ResultReg = getRegForValue(I->getOperand(0));
804    if (ResultReg == 0) return false;
805    // Set the high bits to zero.
806    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
807    if (ResultReg == 0) return false;
808    UpdateValueMap(I, ResultReg);
809    return true;
810  }
811
812  return false;
813}
814
815
816bool X86FastISel::X86SelectBranch(Instruction *I) {
817  // Unconditional branches are selected by tablegen-generated code.
818  // Handle a conditional branch.
819  BranchInst *BI = cast<BranchInst>(I);
820  MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
821  MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
822
823  // Fold the common case of a conditional branch with a comparison.
824  if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
825    if (CI->hasOneUse()) {
826      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
827
828      // Try to take advantage of fallthrough opportunities.
829      CmpInst::Predicate Predicate = CI->getPredicate();
830      if (MBB->isLayoutSuccessor(TrueMBB)) {
831        std::swap(TrueMBB, FalseMBB);
832        Predicate = CmpInst::getInversePredicate(Predicate);
833      }
834
835      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
836      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
837
838      switch (Predicate) {
839      case CmpInst::FCMP_OEQ:
840        std::swap(TrueMBB, FalseMBB);
841        Predicate = CmpInst::FCMP_UNE;
842        // FALL THROUGH
843      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
844      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
845      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
846      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
847      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
848      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
849      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
850      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
851      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
852      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
853      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
854      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
855      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
856
857      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
858      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
859      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
860      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
861      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
862      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
863      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
864      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
865      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
866      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
867      default:
868        return false;
869      }
870
871      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
872      if (SwapArgs)
873        std::swap(Op0, Op1);
874
875      // Emit a compare of the LHS and RHS, setting the flags.
876      if (!X86FastEmitCompare(Op0, Op1, VT))
877        return false;
878
879      BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
880
881      if (Predicate == CmpInst::FCMP_UNE) {
882        // X86 requires a second branch to handle UNE (and OEQ,
883        // which is mapped to UNE above).
884        BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
885      }
886
887      FastEmitBranch(FalseMBB);
888      MBB->addSuccessor(TrueMBB);
889      return true;
890    }
891  } else if (ExtractValueInst *EI =
892             dyn_cast<ExtractValueInst>(BI->getCondition())) {
893    // Check to see if the branch instruction is from an "arithmetic with
894    // overflow" intrinsic. The main way these intrinsics are used is:
895    //
896    //   %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
897    //   %sum = extractvalue { i32, i1 } %t, 0
898    //   %obit = extractvalue { i32, i1 } %t, 1
899    //   br i1 %obit, label %overflow, label %normal
900    //
901    // The %sum and %obit are converted in an ADD and a SETO/SETB before
902    // reaching the branch. Therefore, we search backwards through the MBB
903    // looking for the SETO/SETB instruction. If an instruction modifies the
904    // EFLAGS register before we reach the SETO/SETB instruction, then we can't
905    // convert the branch into a JO/JB instruction.
906    if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
907      if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
908          CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
909        const MachineInstr *SetMI = 0;
910        unsigned Reg = lookUpRegForValue(EI);
911
912        for (MachineBasicBlock::const_reverse_iterator
913               RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
914          const MachineInstr &MI = *RI;
915
916          if (MI.modifiesRegister(Reg)) {
917            unsigned Src, Dst, SrcSR, DstSR;
918
919            if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
920              Reg = Src;
921              continue;
922            }
923
924            SetMI = &MI;
925            break;
926          }
927
928          const TargetInstrDesc &TID = MI.getDesc();
929          if (TID.hasUnmodeledSideEffects() ||
930              TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
931            break;
932        }
933
934        if (SetMI) {
935          unsigned OpCode = SetMI->getOpcode();
936
937          if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
938            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
939                                        X86::JO_4 : X86::JB_4))
940              .addMBB(TrueMBB);
941            FastEmitBranch(FalseMBB);
942            MBB->addSuccessor(TrueMBB);
943            return true;
944          }
945        }
946      }
947    }
948  }
949
950  // Otherwise do a clumsy setcc and re-test it.
951  unsigned OpReg = getRegForValue(BI->getCondition());
952  if (OpReg == 0) return false;
953
954  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
955  BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
956  FastEmitBranch(FalseMBB);
957  MBB->addSuccessor(TrueMBB);
958  return true;
959}
960
961bool X86FastISel::X86SelectShift(Instruction *I) {
962  unsigned CReg = 0, OpReg = 0, OpImm = 0;
963  const TargetRegisterClass *RC = NULL;
964  if (I->getType()->isIntegerTy(8)) {
965    CReg = X86::CL;
966    RC = &X86::GR8RegClass;
967    switch (I->getOpcode()) {
968    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
969    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
970    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
971    default: return false;
972    }
973  } else if (I->getType()->isIntegerTy(16)) {
974    CReg = X86::CX;
975    RC = &X86::GR16RegClass;
976    switch (I->getOpcode()) {
977    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
978    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
979    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
980    default: return false;
981    }
982  } else if (I->getType()->isIntegerTy(32)) {
983    CReg = X86::ECX;
984    RC = &X86::GR32RegClass;
985    switch (I->getOpcode()) {
986    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
987    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
988    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
989    default: return false;
990    }
991  } else if (I->getType()->isIntegerTy(64)) {
992    CReg = X86::RCX;
993    RC = &X86::GR64RegClass;
994    switch (I->getOpcode()) {
995    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
996    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
997    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
998    default: return false;
999    }
1000  } else {
1001    return false;
1002  }
1003
1004  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
1005  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
1006    return false;
1007
1008  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1009  if (Op0Reg == 0) return false;
1010
1011  // Fold immediate in shl(x,3).
1012  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
1013    unsigned ResultReg = createResultReg(RC);
1014    BuildMI(MBB, DL, TII.get(OpImm),
1015            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
1016    UpdateValueMap(I, ResultReg);
1017    return true;
1018  }
1019
1020  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1021  if (Op1Reg == 0) return false;
1022  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
1023
1024  // The shift instruction uses X86::CL. If we defined a super-register
1025  // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
1026  // we're doing here.
1027  if (CReg != X86::CL)
1028    BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
1029      .addReg(CReg).addImm(X86::SUBREG_8BIT);
1030
1031  unsigned ResultReg = createResultReg(RC);
1032  BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
1033  UpdateValueMap(I, ResultReg);
1034  return true;
1035}
1036
1037bool X86FastISel::X86SelectSelect(Instruction *I) {
1038  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
1039  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
1040    return false;
1041
1042  unsigned Opc = 0;
1043  const TargetRegisterClass *RC = NULL;
1044  if (VT.getSimpleVT() == MVT::i16) {
1045    Opc = X86::CMOVE16rr;
1046    RC = &X86::GR16RegClass;
1047  } else if (VT.getSimpleVT() == MVT::i32) {
1048    Opc = X86::CMOVE32rr;
1049    RC = &X86::GR32RegClass;
1050  } else if (VT.getSimpleVT() == MVT::i64) {
1051    Opc = X86::CMOVE64rr;
1052    RC = &X86::GR64RegClass;
1053  } else {
1054    return false;
1055  }
1056
1057  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1058  if (Op0Reg == 0) return false;
1059  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1060  if (Op1Reg == 0) return false;
1061  unsigned Op2Reg = getRegForValue(I->getOperand(2));
1062  if (Op2Reg == 0) return false;
1063
1064  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
1065  unsigned ResultReg = createResultReg(RC);
1066  BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
1067  UpdateValueMap(I, ResultReg);
1068  return true;
1069}
1070
1071bool X86FastISel::X86SelectFPExt(Instruction *I) {
1072  // fpext from float to double.
1073  if (Subtarget->hasSSE2() &&
1074      I->getType()->isDoubleTy()) {
1075    Value *V = I->getOperand(0);
1076    if (V->getType()->isFloatTy()) {
1077      unsigned OpReg = getRegForValue(V);
1078      if (OpReg == 0) return false;
1079      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
1080      BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
1081      UpdateValueMap(I, ResultReg);
1082      return true;
1083    }
1084  }
1085
1086  return false;
1087}
1088
1089bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
1090  if (Subtarget->hasSSE2()) {
1091    if (I->getType()->isFloatTy()) {
1092      Value *V = I->getOperand(0);
1093      if (V->getType()->isDoubleTy()) {
1094        unsigned OpReg = getRegForValue(V);
1095        if (OpReg == 0) return false;
1096        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
1097        BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
1098        UpdateValueMap(I, ResultReg);
1099        return true;
1100      }
1101    }
1102  }
1103
1104  return false;
1105}
1106
1107bool X86FastISel::X86SelectTrunc(Instruction *I) {
1108  if (Subtarget->is64Bit())
1109    // All other cases should be handled by the tblgen generated code.
1110    return false;
1111  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1112  EVT DstVT = TLI.getValueType(I->getType());
1113
1114  // This code only handles truncation to byte right now.
1115  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1116    // All other cases should be handled by the tblgen generated code.
1117    return false;
1118  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
1119    // All other cases should be handled by the tblgen generated code.
1120    return false;
1121
1122  unsigned InputReg = getRegForValue(I->getOperand(0));
1123  if (!InputReg)
1124    // Unhandled operand.  Halt "fast" selection and bail.
1125    return false;
1126
1127  // First issue a copy to GR16_ABCD or GR32_ABCD.
1128  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
1129  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
1130    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
1131  unsigned CopyReg = createResultReg(CopyRC);
1132  BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
1133
1134  // Then issue an extract_subreg.
1135  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1136                                                  CopyReg, X86::SUBREG_8BIT);
1137  if (!ResultReg)
1138    return false;
1139
1140  UpdateValueMap(I, ResultReg);
1141  return true;
1142}
1143
1144bool X86FastISel::X86SelectExtractValue(Instruction *I) {
1145  ExtractValueInst *EI = cast<ExtractValueInst>(I);
1146  Value *Agg = EI->getAggregateOperand();
1147
1148  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
1149    switch (CI->getIntrinsicID()) {
1150    default: break;
1151    case Intrinsic::sadd_with_overflow:
1152    case Intrinsic::uadd_with_overflow:
1153      // Cheat a little. We know that the registers for "add" and "seto" are
1154      // allocated sequentially. However, we only keep track of the register
1155      // for "add" in the value map. Use extractvalue's index to get the
1156      // correct register for "seto".
1157      UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
1158      return true;
1159    }
1160  }
1161
1162  return false;
1163}
1164
1165bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
1166  // FIXME: Handle more intrinsics.
1167  switch (I.getIntrinsicID()) {
1168  default: return false;
1169  case Intrinsic::dbg_declare: {
1170    DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
1171    X86AddressMode AM;
1172    assert(DI->getAddress() && "Null address should be checked earlier!");
1173    if (!X86SelectAddress(DI->getAddress(), AM))
1174      return false;
1175    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1176    // FIXME may need to add RegState::Debug to any registers produced,
1177    // although ESP/EBP should be the only ones at the moment.
1178    addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
1179                                        addMetadata(DI->getVariable());
1180    return true;
1181  }
1182  case Intrinsic::trap: {
1183    BuildMI(MBB, DL, TII.get(X86::TRAP));
1184    return true;
1185  }
1186  case Intrinsic::sadd_with_overflow:
1187  case Intrinsic::uadd_with_overflow: {
1188    // Replace "add with overflow" intrinsics with an "add" instruction followed
1189    // by a seto/setc instruction. Later on, when the "extractvalue"
1190    // instructions are encountered, we use the fact that two registers were
1191    // created sequentially to get the correct registers for the "sum" and the
1192    // "overflow bit".
1193    const Function *Callee = I.getCalledFunction();
1194    const Type *RetTy =
1195      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1196
1197    EVT VT;
1198    if (!isTypeLegal(RetTy, VT))
1199      return false;
1200
1201    Value *Op1 = I.getOperand(1);
1202    Value *Op2 = I.getOperand(2);
1203    unsigned Reg1 = getRegForValue(Op1);
1204    unsigned Reg2 = getRegForValue(Op2);
1205
1206    if (Reg1 == 0 || Reg2 == 0)
1207      // FIXME: Handle values *not* in registers.
1208      return false;
1209
1210    unsigned OpC = 0;
1211    if (VT == MVT::i32)
1212      OpC = X86::ADD32rr;
1213    else if (VT == MVT::i64)
1214      OpC = X86::ADD64rr;
1215    else
1216      return false;
1217
1218    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1219    BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
1220    unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
1221
1222    // If the add with overflow is an intra-block value then we just want to
1223    // create temporaries for it like normal.  If it is a cross-block value then
1224    // UpdateValueMap will return the cross-block register used.  Since we
1225    // *really* want the value to be live in the register pair known by
1226    // UpdateValueMap, we have to use DestReg1+1 as the destination register in
1227    // the cross block case.  In the non-cross-block case, we should just make
1228    // another register for the value.
1229    if (DestReg1 != ResultReg)
1230      ResultReg = DestReg1+1;
1231    else
1232      ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
1233
1234    unsigned Opc = X86::SETBr;
1235    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
1236      Opc = X86::SETOr;
1237    BuildMI(MBB, DL, TII.get(Opc), ResultReg);
1238    return true;
1239  }
1240  }
1241}
1242
1243bool X86FastISel::X86SelectCall(Instruction *I) {
1244  CallInst *CI = cast<CallInst>(I);
1245  Value *Callee = I->getOperand(0);
1246
1247  // Can't handle inline asm yet.
1248  if (isa<InlineAsm>(Callee))
1249    return false;
1250
1251  // Handle intrinsic calls.
1252  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1253    return X86VisitIntrinsicCall(*II);
1254
1255  // Handle only C and fastcc calling conventions for now.
1256  CallSite CS(CI);
1257  CallingConv::ID CC = CS.getCallingConv();
1258  if (CC != CallingConv::C &&
1259      CC != CallingConv::Fast &&
1260      CC != CallingConv::X86_FastCall)
1261    return false;
1262
1263  // fastcc with -tailcallopt is intended to provide a guaranteed
1264  // tail call optimization. Fastisel doesn't know how to do that.
1265  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
1266    return false;
1267
1268  // Let SDISel handle vararg functions.
1269  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1270  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1271  if (FTy->isVarArg())
1272    return false;
1273
1274  // Handle *simple* calls for now.
1275  const Type *RetTy = CS.getType();
1276  EVT RetVT;
1277  if (RetTy->isVoidTy())
1278    RetVT = MVT::isVoid;
1279  else if (!isTypeLegal(RetTy, RetVT, true))
1280    return false;
1281
1282  // Materialize callee address in a register. FIXME: GV address can be
1283  // handled with a CALLpcrel32 instead.
1284  X86AddressMode CalleeAM;
1285  if (!X86SelectCallAddress(Callee, CalleeAM))
1286    return false;
1287  unsigned CalleeOp = 0;
1288  GlobalValue *GV = 0;
1289  if (CalleeAM.GV != 0) {
1290    GV = CalleeAM.GV;
1291  } else if (CalleeAM.Base.Reg != 0) {
1292    CalleeOp = CalleeAM.Base.Reg;
1293  } else
1294    return false;
1295
1296  // Allow calls which produce i1 results.
1297  bool AndToI1 = false;
1298  if (RetVT == MVT::i1) {
1299    RetVT = MVT::i8;
1300    AndToI1 = true;
1301  }
1302
1303  // Deal with call operands first.
1304  SmallVector<Value*, 8> ArgVals;
1305  SmallVector<unsigned, 8> Args;
1306  SmallVector<EVT, 8> ArgVTs;
1307  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1308  Args.reserve(CS.arg_size());
1309  ArgVals.reserve(CS.arg_size());
1310  ArgVTs.reserve(CS.arg_size());
1311  ArgFlags.reserve(CS.arg_size());
1312  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1313       i != e; ++i) {
1314    unsigned Arg = getRegForValue(*i);
1315    if (Arg == 0)
1316      return false;
1317    ISD::ArgFlagsTy Flags;
1318    unsigned AttrInd = i - CS.arg_begin() + 1;
1319    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1320      Flags.setSExt();
1321    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1322      Flags.setZExt();
1323
1324    // FIXME: Only handle *easy* calls for now.
1325    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1326        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1327        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1328        CS.paramHasAttr(AttrInd, Attribute::ByVal))
1329      return false;
1330
1331    const Type *ArgTy = (*i)->getType();
1332    EVT ArgVT;
1333    if (!isTypeLegal(ArgTy, ArgVT))
1334      return false;
1335    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1336    Flags.setOrigAlign(OriginalAlignment);
1337
1338    Args.push_back(Arg);
1339    ArgVals.push_back(*i);
1340    ArgVTs.push_back(ArgVT);
1341    ArgFlags.push_back(Flags);
1342  }
1343
1344  // Analyze operands of the call, assigning locations to each operand.
1345  SmallVector<CCValAssign, 16> ArgLocs;
1346  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
1347  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
1348
1349  // Get a count of how many bytes are to be pushed on the stack.
1350  unsigned NumBytes = CCInfo.getNextStackOffset();
1351
1352  // Issue CALLSEQ_START
1353  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1354  BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
1355
1356  // Process argument: walk the register/memloc assignments, inserting
1357  // copies / loads.
1358  SmallVector<unsigned, 4> RegArgs;
1359  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1360    CCValAssign &VA = ArgLocs[i];
1361    unsigned Arg = Args[VA.getValNo()];
1362    EVT ArgVT = ArgVTs[VA.getValNo()];
1363
1364    // Promote the value if needed.
1365    switch (VA.getLocInfo()) {
1366    default: llvm_unreachable("Unknown loc info!");
1367    case CCValAssign::Full: break;
1368    case CCValAssign::SExt: {
1369      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1370                                       Arg, ArgVT, Arg);
1371      assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
1372      Emitted = true;
1373      ArgVT = VA.getLocVT();
1374      break;
1375    }
1376    case CCValAssign::ZExt: {
1377      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1378                                       Arg, ArgVT, Arg);
1379      assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
1380      Emitted = true;
1381      ArgVT = VA.getLocVT();
1382      break;
1383    }
1384    case CCValAssign::AExt: {
1385      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1386                                       Arg, ArgVT, Arg);
1387      if (!Emitted)
1388        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1389                                    Arg, ArgVT, Arg);
1390      if (!Emitted)
1391        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1392                                    Arg, ArgVT, Arg);
1393
1394      assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
1395      ArgVT = VA.getLocVT();
1396      break;
1397    }
1398    case CCValAssign::BCvt: {
1399      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
1400                               ISD::BIT_CONVERT, Arg);
1401      assert(BC != 0 && "Failed to emit a bitcast!");
1402      Arg = BC;
1403      ArgVT = VA.getLocVT();
1404      break;
1405    }
1406    }
1407
1408    if (VA.isRegLoc()) {
1409      TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
1410      bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
1411                                      Arg, RC, RC);
1412      assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1413      Emitted = true;
1414      RegArgs.push_back(VA.getLocReg());
1415    } else {
1416      unsigned LocMemOffset = VA.getLocMemOffset();
1417      X86AddressMode AM;
1418      AM.Base.Reg = StackPtr;
1419      AM.Disp = LocMemOffset;
1420      Value *ArgVal = ArgVals[VA.getValNo()];
1421
1422      // If this is a really simple value, emit this with the Value* version of
1423      // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
1424      // can cause us to reevaluate the argument.
1425      if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
1426        X86FastEmitStore(ArgVT, ArgVal, AM);
1427      else
1428        X86FastEmitStore(ArgVT, Arg, AM);
1429    }
1430  }
1431
1432  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1433  // GOT pointer.
1434  if (Subtarget->isPICStyleGOT()) {
1435    TargetRegisterClass *RC = X86::GR32RegisterClass;
1436    unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
1437    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
1438    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1439    Emitted = true;
1440  }
1441
1442  // Issue the call.
1443  MachineInstrBuilder MIB;
1444  if (CalleeOp) {
1445    // Register-indirect call.
1446    unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
1447    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
1448
1449  } else {
1450    // Direct call.
1451    assert(GV && "Not a direct call");
1452    unsigned CallOpc =
1453      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
1454
1455    // See if we need any target-specific flags on the GV operand.
1456    unsigned char OpFlags = 0;
1457
1458    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
1459    // external symbols most go through the PLT in PIC mode.  If the symbol
1460    // has hidden or protected visibility, or if it is static or local, then
1461    // we don't need to use the PLT - we can directly call it.
1462    if (Subtarget->isTargetELF() &&
1463        TM.getRelocationModel() == Reloc::PIC_ &&
1464        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
1465      OpFlags = X86II::MO_PLT;
1466    } else if (Subtarget->isPICStyleStubAny() &&
1467               (GV->isDeclaration() || GV->isWeakForLinker()) &&
1468               Subtarget->getDarwinVers() < 9) {
1469      // PC-relative references to external symbols should go through $stub,
1470      // unless we're building with the leopard linker or later, which
1471      // automatically synthesizes these stubs.
1472      OpFlags = X86II::MO_DARWIN_STUB;
1473    }
1474
1475
1476    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
1477  }
1478
1479  // Add an implicit use GOT pointer in EBX.
1480  if (Subtarget->isPICStyleGOT())
1481    MIB.addReg(X86::EBX);
1482
1483  // Add implicit physical register uses to the call.
1484  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1485    MIB.addReg(RegArgs[i]);
1486
1487  // Issue CALLSEQ_END
1488  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1489  BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
1490
1491  // Now handle call return value (if any).
1492  if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
1493    SmallVector<CCValAssign, 16> RVLocs;
1494    CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
1495    CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
1496
1497    // Copy all of the result registers out of their specified physreg.
1498    assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1499    EVT CopyVT = RVLocs[0].getValVT();
1500    TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1501    TargetRegisterClass *SrcRC = DstRC;
1502
1503    // If this is a call to a function that returns an fp value on the x87 fp
1504    // stack, but where we prefer to use the value in xmm registers, copy it
1505    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1506    if ((RVLocs[0].getLocReg() == X86::ST0 ||
1507         RVLocs[0].getLocReg() == X86::ST1) &&
1508        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
1509      CopyVT = MVT::f80;
1510      SrcRC = X86::RSTRegisterClass;
1511      DstRC = X86::RFP80RegisterClass;
1512    }
1513
1514    unsigned ResultReg = createResultReg(DstRC);
1515    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
1516                                    RVLocs[0].getLocReg(), DstRC, SrcRC);
1517    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1518    Emitted = true;
1519    if (CopyVT != RVLocs[0].getValVT()) {
1520      // Round the F80 the right size, which also moves to the appropriate xmm
1521      // register. This is accomplished by storing the F80 value in memory and
1522      // then loading it back. Ewww...
1523      EVT ResVT = RVLocs[0].getValVT();
1524      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
1525      unsigned MemSize = ResVT.getSizeInBits()/8;
1526      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
1527      addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
1528      DstRC = ResVT == MVT::f32
1529        ? X86::FR32RegisterClass : X86::FR64RegisterClass;
1530      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
1531      ResultReg = createResultReg(DstRC);
1532      addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
1533    }
1534
1535    if (AndToI1) {
1536      // Mask out all but lowest bit for some call which produces an i1.
1537      unsigned AndResult = createResultReg(X86::GR8RegisterClass);
1538      BuildMI(MBB, DL,
1539              TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
1540      ResultReg = AndResult;
1541    }
1542
1543    UpdateValueMap(I, ResultReg);
1544  }
1545
1546  return true;
1547}
1548
1549
1550bool
1551X86FastISel::TargetSelectInstruction(Instruction *I)  {
1552  switch (I->getOpcode()) {
1553  default: break;
1554  case Instruction::Load:
1555    return X86SelectLoad(I);
1556  case Instruction::Store:
1557    return X86SelectStore(I);
1558  case Instruction::ICmp:
1559  case Instruction::FCmp:
1560    return X86SelectCmp(I);
1561  case Instruction::ZExt:
1562    return X86SelectZExt(I);
1563  case Instruction::Br:
1564    return X86SelectBranch(I);
1565  case Instruction::Call:
1566    return X86SelectCall(I);
1567  case Instruction::LShr:
1568  case Instruction::AShr:
1569  case Instruction::Shl:
1570    return X86SelectShift(I);
1571  case Instruction::Select:
1572    return X86SelectSelect(I);
1573  case Instruction::Trunc:
1574    return X86SelectTrunc(I);
1575  case Instruction::FPExt:
1576    return X86SelectFPExt(I);
1577  case Instruction::FPTrunc:
1578    return X86SelectFPTrunc(I);
1579  case Instruction::ExtractValue:
1580    return X86SelectExtractValue(I);
1581  case Instruction::IntToPtr: // Deliberate fall-through.
1582  case Instruction::PtrToInt: {
1583    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1584    EVT DstVT = TLI.getValueType(I->getType());
1585    if (DstVT.bitsGT(SrcVT))
1586      return X86SelectZExt(I);
1587    if (DstVT.bitsLT(SrcVT))
1588      return X86SelectTrunc(I);
1589    unsigned Reg = getRegForValue(I->getOperand(0));
1590    if (Reg == 0) return false;
1591    UpdateValueMap(I, Reg);
1592    return true;
1593  }
1594  }
1595
1596  return false;
1597}
1598
1599unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
1600  EVT VT;
1601  if (!isTypeLegal(C->getType(), VT))
1602    return false;
1603
1604  // Get opcode and regclass of the output for the given load instruction.
1605  unsigned Opc = 0;
1606  const TargetRegisterClass *RC = NULL;
1607  switch (VT.getSimpleVT().SimpleTy) {
1608  default: return false;
1609  case MVT::i8:
1610    Opc = X86::MOV8rm;
1611    RC  = X86::GR8RegisterClass;
1612    break;
1613  case MVT::i16:
1614    Opc = X86::MOV16rm;
1615    RC  = X86::GR16RegisterClass;
1616    break;
1617  case MVT::i32:
1618    Opc = X86::MOV32rm;
1619    RC  = X86::GR32RegisterClass;
1620    break;
1621  case MVT::i64:
1622    // Must be in x86-64 mode.
1623    Opc = X86::MOV64rm;
1624    RC  = X86::GR64RegisterClass;
1625    break;
1626  case MVT::f32:
1627    if (Subtarget->hasSSE1()) {
1628      Opc = X86::MOVSSrm;
1629      RC  = X86::FR32RegisterClass;
1630    } else {
1631      Opc = X86::LD_Fp32m;
1632      RC  = X86::RFP32RegisterClass;
1633    }
1634    break;
1635  case MVT::f64:
1636    if (Subtarget->hasSSE2()) {
1637      Opc = X86::MOVSDrm;
1638      RC  = X86::FR64RegisterClass;
1639    } else {
1640      Opc = X86::LD_Fp64m;
1641      RC  = X86::RFP64RegisterClass;
1642    }
1643    break;
1644  case MVT::f80:
1645    // No f80 support yet.
1646    return false;
1647  }
1648
1649  // Materialize addresses with LEA instructions.
1650  if (isa<GlobalValue>(C)) {
1651    X86AddressMode AM;
1652    if (X86SelectAddress(C, AM)) {
1653      if (TLI.getPointerTy() == MVT::i32)
1654        Opc = X86::LEA32r;
1655      else
1656        Opc = X86::LEA64r;
1657      unsigned ResultReg = createResultReg(RC);
1658      addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1659      return ResultReg;
1660    }
1661    return 0;
1662  }
1663
1664  // MachineConstantPool wants an explicit alignment.
1665  unsigned Align = TD.getPrefTypeAlignment(C->getType());
1666  if (Align == 0) {
1667    // Alignment of vector types.  FIXME!
1668    Align = TD.getTypeAllocSize(C->getType());
1669  }
1670
1671  // x86-32 PIC requires a PIC base register for constant pools.
1672  unsigned PICBase = 0;
1673  unsigned char OpFlag = 0;
1674  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
1675    OpFlag = X86II::MO_PIC_BASE_OFFSET;
1676    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
1677  } else if (Subtarget->isPICStyleGOT()) {
1678    OpFlag = X86II::MO_GOTOFF;
1679    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
1680  } else if (Subtarget->isPICStyleRIPRel() &&
1681             TM.getCodeModel() == CodeModel::Small) {
1682    PICBase = X86::RIP;
1683  }
1684
1685  // Create the load from the constant pool.
1686  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
1687  unsigned ResultReg = createResultReg(RC);
1688  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
1689                           MCPOffset, PICBase, OpFlag);
1690
1691  return ResultReg;
1692}
1693
1694unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
1695  // Fail on dynamic allocas. At this point, getRegForValue has already
1696  // checked its CSE maps, so if we're here trying to handle a dynamic
1697  // alloca, we're not going to succeed. X86SelectAddress has a
1698  // check for dynamic allocas, because it's called directly from
1699  // various places, but TargetMaterializeAlloca also needs a check
1700  // in order to avoid recursion between getRegForValue,
1701  // X86SelectAddrss, and TargetMaterializeAlloca.
1702  if (!StaticAllocaMap.count(C))
1703    return 0;
1704
1705  X86AddressMode AM;
1706  if (!X86SelectAddress(C, AM))
1707    return 0;
1708  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
1709  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
1710  unsigned ResultReg = createResultReg(RC);
1711  addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1712  return ResultReg;
1713}
1714
1715namespace llvm {
1716  llvm::FastISel *X86::createFastISel(MachineFunction &mf,
1717                        MachineModuleInfo *mmi,
1718                        DwarfWriter *dw,
1719                        DenseMap<const Value *, unsigned> &vm,
1720                        DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
1721                        DenseMap<const AllocaInst *, int> &am
1722#ifndef NDEBUG
1723                        , SmallSet<Instruction*, 8> &cil
1724#endif
1725                        ) {
1726    return new X86FastISel(mf, mmi, dw, vm, bm, am
1727#ifndef NDEBUG
1728                           , cil
1729#endif
1730                           );
1731  }
1732}
1733