X86FastISel.cpp revision 0bc25f40402f48ba42fc45403f635b20d90fabb3
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86RegisterInfo.h"
19#include "X86Subtarget.h"
20#include "X86TargetMachine.h"
21#include "llvm/CallingConv.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/GlobalVariable.h"
24#include "llvm/Instructions.h"
25#include "llvm/IntrinsicInst.h"
26#include "llvm/CodeGen/FastISel.h"
27#include "llvm/CodeGen/FunctionLoweringInfo.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/CallSite.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/GetElementPtrTypeIterator.h"
34#include "llvm/Target/TargetOptions.h"
35using namespace llvm;
36
37namespace {
38
39class X86FastISel : public FastISel {
40  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
41  /// make the right decision when generating code for different targets.
42  const X86Subtarget *Subtarget;
43
44  /// StackPtr - Register used as the stack pointer.
45  ///
46  unsigned StackPtr;
47
48  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
49  /// floating point ops.
50  /// When SSE is available, use it for f32 operations.
51  /// When SSE2 is available, use it for f64 operations.
52  bool X86ScalarSSEf64;
53  bool X86ScalarSSEf32;
54
55public:
56  explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
57    Subtarget = &TM.getSubtarget<X86Subtarget>();
58    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
59    X86ScalarSSEf64 = Subtarget->hasSSE2();
60    X86ScalarSSEf32 = Subtarget->hasSSE1();
61  }
62
63  virtual bool TargetSelectInstruction(const Instruction *I);
64
65#include "X86GenFastISel.inc"
66
67private:
68  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
69
70  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
71
72  bool X86FastEmitStore(EVT VT, const Value *Val,
73                        const X86AddressMode &AM);
74  bool X86FastEmitStore(EVT VT, unsigned Val,
75                        const X86AddressMode &AM);
76
77  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
78                         unsigned &ResultReg);
79
80  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
81  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
82
83  bool X86SelectLoad(const Instruction *I);
84
85  bool X86SelectStore(const Instruction *I);
86
87  bool X86SelectCmp(const Instruction *I);
88
89  bool X86SelectZExt(const Instruction *I);
90
91  bool X86SelectBranch(const Instruction *I);
92
93  bool X86SelectShift(const Instruction *I);
94
95  bool X86SelectSelect(const Instruction *I);
96
97  bool X86SelectTrunc(const Instruction *I);
98
99  bool X86SelectFPExt(const Instruction *I);
100  bool X86SelectFPTrunc(const Instruction *I);
101
102  bool X86SelectExtractValue(const Instruction *I);
103
104  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
105  bool X86SelectCall(const Instruction *I);
106
107  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
108
109  const X86InstrInfo *getInstrInfo() const {
110    return getTargetMachine()->getInstrInfo();
111  }
112  const X86TargetMachine *getTargetMachine() const {
113    return static_cast<const X86TargetMachine *>(&TM);
114  }
115
116  unsigned TargetMaterializeConstant(const Constant *C);
117
118  unsigned TargetMaterializeAlloca(const AllocaInst *C);
119
120  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
121  /// computed in an SSE register, not on the X87 floating point stack.
122  bool isScalarFPTypeInSSEReg(EVT VT) const {
123    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
124      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
125  }
126
127  bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
128};
129
130} // end anonymous namespace.
131
132bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
133  VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
134  if (VT == MVT::Other || !VT.isSimple())
135    // Unhandled type. Halt "fast" selection and bail.
136    return false;
137
138  // For now, require SSE/SSE2 for performing floating-point operations,
139  // since x87 requires additional work.
140  if (VT == MVT::f64 && !X86ScalarSSEf64)
141     return false;
142  if (VT == MVT::f32 && !X86ScalarSSEf32)
143     return false;
144  // Similarly, no f80 support yet.
145  if (VT == MVT::f80)
146    return false;
147  // We only handle legal types. For example, on x86-32 the instruction
148  // selector contains all of the 64-bit instructions from x86-64,
149  // under the assumption that i64 won't be used if the target doesn't
150  // support it.
151  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
152}
153
154#include "X86GenCallingConv.inc"
155
156/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
157/// convention.
158CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
159                                           bool isTaillCall) {
160  if (Subtarget->is64Bit()) {
161    if (CC == CallingConv::GHC)
162      return CC_X86_64_GHC;
163    else if (Subtarget->isTargetWin64())
164      return CC_X86_Win64_C;
165    else
166      return CC_X86_64_C;
167  }
168
169  if (CC == CallingConv::X86_FastCall)
170    return CC_X86_32_FastCall;
171  else if (CC == CallingConv::X86_ThisCall)
172    return CC_X86_32_ThisCall;
173  else if (CC == CallingConv::Fast)
174    return CC_X86_32_FastCC;
175  else if (CC == CallingConv::GHC)
176    return CC_X86_32_GHC;
177  else
178    return CC_X86_32_C;
179}
180
181/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
182/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
183/// Return true and the result register by reference if it is possible.
184bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
185                                  unsigned &ResultReg) {
186  // Get opcode and regclass of the output for the given load instruction.
187  unsigned Opc = 0;
188  const TargetRegisterClass *RC = NULL;
189  switch (VT.getSimpleVT().SimpleTy) {
190  default: return false;
191  case MVT::i1:
192  case MVT::i8:
193    Opc = X86::MOV8rm;
194    RC  = X86::GR8RegisterClass;
195    break;
196  case MVT::i16:
197    Opc = X86::MOV16rm;
198    RC  = X86::GR16RegisterClass;
199    break;
200  case MVT::i32:
201    Opc = X86::MOV32rm;
202    RC  = X86::GR32RegisterClass;
203    break;
204  case MVT::i64:
205    // Must be in x86-64 mode.
206    Opc = X86::MOV64rm;
207    RC  = X86::GR64RegisterClass;
208    break;
209  case MVT::f32:
210    if (Subtarget->hasSSE1()) {
211      Opc = X86::MOVSSrm;
212      RC  = X86::FR32RegisterClass;
213    } else {
214      Opc = X86::LD_Fp32m;
215      RC  = X86::RFP32RegisterClass;
216    }
217    break;
218  case MVT::f64:
219    if (Subtarget->hasSSE2()) {
220      Opc = X86::MOVSDrm;
221      RC  = X86::FR64RegisterClass;
222    } else {
223      Opc = X86::LD_Fp64m;
224      RC  = X86::RFP64RegisterClass;
225    }
226    break;
227  case MVT::f80:
228    // No f80 support yet.
229    return false;
230  }
231
232  ResultReg = createResultReg(RC);
233  addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
234  return true;
235}
236
237/// X86FastEmitStore - Emit a machine instruction to store a value Val of
238/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
239/// and a displacement offset, or a GlobalAddress,
240/// i.e. V. Return true if it is possible.
241bool
242X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
243                              const X86AddressMode &AM) {
244  // Get opcode and regclass of the output for the given store instruction.
245  unsigned Opc = 0;
246  switch (VT.getSimpleVT().SimpleTy) {
247  case MVT::f80: // No f80 support yet.
248  default: return false;
249  case MVT::i1: {
250    // Mask out all but lowest bit.
251    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
252    BuildMI(MBB, DL,
253            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
254    Val = AndResult;
255  }
256  // FALLTHROUGH, handling i1 as i8.
257  case MVT::i8:  Opc = X86::MOV8mr;  break;
258  case MVT::i16: Opc = X86::MOV16mr; break;
259  case MVT::i32: Opc = X86::MOV32mr; break;
260  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
261  case MVT::f32:
262    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
263    break;
264  case MVT::f64:
265    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
266    break;
267  }
268
269  addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
270  return true;
271}
272
273bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
274                                   const X86AddressMode &AM) {
275  // Handle 'null' like i32/i64 0.
276  if (isa<ConstantPointerNull>(Val))
277    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
278
279  // If this is a store of a simple constant, fold the constant into the store.
280  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
281    unsigned Opc = 0;
282    bool Signed = true;
283    switch (VT.getSimpleVT().SimpleTy) {
284    default: break;
285    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
286    case MVT::i8:  Opc = X86::MOV8mi;  break;
287    case MVT::i16: Opc = X86::MOV16mi; break;
288    case MVT::i32: Opc = X86::MOV32mi; break;
289    case MVT::i64:
290      // Must be a 32-bit sign extended value.
291      if ((int)CI->getSExtValue() == CI->getSExtValue())
292        Opc = X86::MOV64mi32;
293      break;
294    }
295
296    if (Opc) {
297      addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
298                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
299                                              CI->getZExtValue());
300      return true;
301    }
302  }
303
304  unsigned ValReg = getRegForValue(Val);
305  if (ValReg == 0)
306    return false;
307
308  return X86FastEmitStore(VT, ValReg, AM);
309}
310
311/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
312/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
313/// ISD::SIGN_EXTEND).
314bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
315                                    unsigned Src, EVT SrcVT,
316                                    unsigned &ResultReg) {
317  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
318                           Src, /*TODO: Kill=*/false);
319
320  if (RR != 0) {
321    ResultReg = RR;
322    return true;
323  } else
324    return false;
325}
326
327/// X86SelectAddress - Attempt to fill in an address from the given value.
328///
329bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
330  const User *U = NULL;
331  unsigned Opcode = Instruction::UserOp1;
332  if (const Instruction *I = dyn_cast<Instruction>(V)) {
333    // Don't walk into other basic blocks; it's possible we haven't
334    // visited them yet, so the instructions may not yet be assigned
335    // virtual registers.
336    if (FuncInfo.MBBMap[I->getParent()] != MBB)
337      return false;
338
339    Opcode = I->getOpcode();
340    U = I;
341  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
342    Opcode = C->getOpcode();
343    U = C;
344  }
345
346  if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
347    if (Ty->getAddressSpace() > 255)
348      // Fast instruction selection doesn't support the special
349      // address spaces.
350      return false;
351
352  switch (Opcode) {
353  default: break;
354  case Instruction::BitCast:
355    // Look past bitcasts.
356    return X86SelectAddress(U->getOperand(0), AM);
357
358  case Instruction::IntToPtr:
359    // Look past no-op inttoptrs.
360    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
361      return X86SelectAddress(U->getOperand(0), AM);
362    break;
363
364  case Instruction::PtrToInt:
365    // Look past no-op ptrtoints.
366    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
367      return X86SelectAddress(U->getOperand(0), AM);
368    break;
369
370  case Instruction::Alloca: {
371    // Do static allocas.
372    const AllocaInst *A = cast<AllocaInst>(V);
373    DenseMap<const AllocaInst*, int>::iterator SI =
374      FuncInfo.StaticAllocaMap.find(A);
375    if (SI != FuncInfo.StaticAllocaMap.end()) {
376      AM.BaseType = X86AddressMode::FrameIndexBase;
377      AM.Base.FrameIndex = SI->second;
378      return true;
379    }
380    break;
381  }
382
383  case Instruction::Add: {
384    // Adds of constants are common and easy enough.
385    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
386      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
387      // They have to fit in the 32-bit signed displacement field though.
388      if (isInt<32>(Disp)) {
389        AM.Disp = (uint32_t)Disp;
390        return X86SelectAddress(U->getOperand(0), AM);
391      }
392    }
393    break;
394  }
395
396  case Instruction::GetElementPtr: {
397    X86AddressMode SavedAM = AM;
398
399    // Pattern-match simple GEPs.
400    uint64_t Disp = (int32_t)AM.Disp;
401    unsigned IndexReg = AM.IndexReg;
402    unsigned Scale = AM.Scale;
403    gep_type_iterator GTI = gep_type_begin(U);
404    // Iterate through the indices, folding what we can. Constants can be
405    // folded, and one dynamic index can be handled, if the scale is supported.
406    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
407         i != e; ++i, ++GTI) {
408      const Value *Op = *i;
409      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
410        const StructLayout *SL = TD.getStructLayout(STy);
411        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
412        Disp += SL->getElementOffset(Idx);
413      } else {
414        uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
415        SmallVector<const Value *, 4> Worklist;
416        Worklist.push_back(Op);
417        do {
418          Op = Worklist.pop_back_val();
419          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
420            // Constant-offset addressing.
421            Disp += CI->getSExtValue() * S;
422          } else if (isa<AddOperator>(Op) &&
423                     isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
424            // An add with a constant operand. Fold the constant.
425            ConstantInt *CI =
426              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
427            Disp += CI->getSExtValue() * S;
428            // Add the other operand back to the work list.
429            Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
430          } else if (IndexReg == 0 &&
431                     (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
432                     (S == 1 || S == 2 || S == 4 || S == 8)) {
433            // Scaled-index addressing.
434            Scale = S;
435            IndexReg = getRegForGEPIndex(Op).first;
436            if (IndexReg == 0)
437              return false;
438          } else
439            // Unsupported.
440            goto unsupported_gep;
441        } while (!Worklist.empty());
442      }
443    }
444    // Check for displacement overflow.
445    if (!isInt<32>(Disp))
446      break;
447    // Ok, the GEP indices were covered by constant-offset and scaled-index
448    // addressing. Update the address state and move on to examining the base.
449    AM.IndexReg = IndexReg;
450    AM.Scale = Scale;
451    AM.Disp = (uint32_t)Disp;
452    if (X86SelectAddress(U->getOperand(0), AM))
453      return true;
454
455    // If we couldn't merge the sub value into this addr mode, revert back to
456    // our address and just match the value instead of completely failing.
457    AM = SavedAM;
458    break;
459  unsupported_gep:
460    // Ok, the GEP indices weren't all covered.
461    break;
462  }
463  }
464
465  // Handle constant address.
466  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
467    // Can't handle alternate code models yet.
468    if (TM.getCodeModel() != CodeModel::Small)
469      return false;
470
471    // RIP-relative addresses can't have additional register operands.
472    if (Subtarget->isPICStyleRIPRel() &&
473        (AM.Base.Reg != 0 || AM.IndexReg != 0))
474      return false;
475
476    // Can't handle TLS yet.
477    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
478      if (GVar->isThreadLocal())
479        return false;
480
481    // Okay, we've committed to selecting this global. Set up the basic address.
482    AM.GV = GV;
483
484    // Allow the subtarget to classify the global.
485    unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
486
487    // If this reference is relative to the pic base, set it now.
488    if (isGlobalRelativeToPICBase(GVFlags)) {
489      // FIXME: How do we know Base.Reg is free??
490      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
491    }
492
493    // Unless the ABI requires an extra load, return a direct reference to
494    // the global.
495    if (!isGlobalStubReference(GVFlags)) {
496      if (Subtarget->isPICStyleRIPRel()) {
497        // Use rip-relative addressing if we can.  Above we verified that the
498        // base and index registers are unused.
499        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
500        AM.Base.Reg = X86::RIP;
501      }
502      AM.GVOpFlags = GVFlags;
503      return true;
504    }
505
506    // Ok, we need to do a load from a stub.  If we've already loaded from this
507    // stub, reuse the loaded pointer, otherwise emit the load now.
508    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
509    unsigned LoadReg;
510    if (I != LocalValueMap.end() && I->second != 0) {
511      LoadReg = I->second;
512    } else {
513      // Issue load from stub.
514      unsigned Opc = 0;
515      const TargetRegisterClass *RC = NULL;
516      X86AddressMode StubAM;
517      StubAM.Base.Reg = AM.Base.Reg;
518      StubAM.GV = GV;
519      StubAM.GVOpFlags = GVFlags;
520
521      if (TLI.getPointerTy() == MVT::i64) {
522        Opc = X86::MOV64rm;
523        RC  = X86::GR64RegisterClass;
524
525        if (Subtarget->isPICStyleRIPRel())
526          StubAM.Base.Reg = X86::RIP;
527      } else {
528        Opc = X86::MOV32rm;
529        RC  = X86::GR32RegisterClass;
530      }
531
532      LoadReg = createResultReg(RC);
533      addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
534
535      // Prevent loading GV stub multiple times in same MBB.
536      LocalValueMap[V] = LoadReg;
537    }
538
539    // Now construct the final address. Note that the Disp, Scale,
540    // and Index values may already be set here.
541    AM.Base.Reg = LoadReg;
542    AM.GV = 0;
543    return true;
544  }
545
546  // If all else fails, try to materialize the value in a register.
547  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
548    if (AM.Base.Reg == 0) {
549      AM.Base.Reg = getRegForValue(V);
550      return AM.Base.Reg != 0;
551    }
552    if (AM.IndexReg == 0) {
553      assert(AM.Scale == 1 && "Scale with no index!");
554      AM.IndexReg = getRegForValue(V);
555      return AM.IndexReg != 0;
556    }
557  }
558
559  return false;
560}
561
562/// X86SelectCallAddress - Attempt to fill in an address from the given value.
563///
564bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
565  const User *U = NULL;
566  unsigned Opcode = Instruction::UserOp1;
567  if (const Instruction *I = dyn_cast<Instruction>(V)) {
568    Opcode = I->getOpcode();
569    U = I;
570  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
571    Opcode = C->getOpcode();
572    U = C;
573  }
574
575  switch (Opcode) {
576  default: break;
577  case Instruction::BitCast:
578    // Look past bitcasts.
579    return X86SelectCallAddress(U->getOperand(0), AM);
580
581  case Instruction::IntToPtr:
582    // Look past no-op inttoptrs.
583    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
584      return X86SelectCallAddress(U->getOperand(0), AM);
585    break;
586
587  case Instruction::PtrToInt:
588    // Look past no-op ptrtoints.
589    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
590      return X86SelectCallAddress(U->getOperand(0), AM);
591    break;
592  }
593
594  // Handle constant address.
595  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
596    // Can't handle alternate code models yet.
597    if (TM.getCodeModel() != CodeModel::Small)
598      return false;
599
600    // RIP-relative addresses can't have additional register operands.
601    if (Subtarget->isPICStyleRIPRel() &&
602        (AM.Base.Reg != 0 || AM.IndexReg != 0))
603      return false;
604
605    // Can't handle TLS or DLLImport.
606    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
607      if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
608        return false;
609
610    // Okay, we've committed to selecting this global. Set up the basic address.
611    AM.GV = GV;
612
613    // No ABI requires an extra load for anything other than DLLImport, which
614    // we rejected above. Return a direct reference to the global.
615    if (Subtarget->isPICStyleRIPRel()) {
616      // Use rip-relative addressing if we can.  Above we verified that the
617      // base and index registers are unused.
618      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
619      AM.Base.Reg = X86::RIP;
620    } else if (Subtarget->isPICStyleStubPIC()) {
621      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
622    } else if (Subtarget->isPICStyleGOT()) {
623      AM.GVOpFlags = X86II::MO_GOTOFF;
624    }
625
626    return true;
627  }
628
629  // If all else fails, try to materialize the value in a register.
630  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
631    if (AM.Base.Reg == 0) {
632      AM.Base.Reg = getRegForValue(V);
633      return AM.Base.Reg != 0;
634    }
635    if (AM.IndexReg == 0) {
636      assert(AM.Scale == 1 && "Scale with no index!");
637      AM.IndexReg = getRegForValue(V);
638      return AM.IndexReg != 0;
639    }
640  }
641
642  return false;
643}
644
645
646/// X86SelectStore - Select and emit code to implement store instructions.
647bool X86FastISel::X86SelectStore(const Instruction *I) {
648  EVT VT;
649  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
650    return false;
651
652  X86AddressMode AM;
653  if (!X86SelectAddress(I->getOperand(1), AM))
654    return false;
655
656  return X86FastEmitStore(VT, I->getOperand(0), AM);
657}
658
659/// X86SelectLoad - Select and emit code to implement load instructions.
660///
661bool X86FastISel::X86SelectLoad(const Instruction *I)  {
662  EVT VT;
663  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
664    return false;
665
666  X86AddressMode AM;
667  if (!X86SelectAddress(I->getOperand(0), AM))
668    return false;
669
670  unsigned ResultReg = 0;
671  if (X86FastEmitLoad(VT, AM, ResultReg)) {
672    UpdateValueMap(I, ResultReg);
673    return true;
674  }
675  return false;
676}
677
678static unsigned X86ChooseCmpOpcode(EVT VT) {
679  switch (VT.getSimpleVT().SimpleTy) {
680  default:       return 0;
681  case MVT::i8:  return X86::CMP8rr;
682  case MVT::i16: return X86::CMP16rr;
683  case MVT::i32: return X86::CMP32rr;
684  case MVT::i64: return X86::CMP64rr;
685  case MVT::f32: return X86::UCOMISSrr;
686  case MVT::f64: return X86::UCOMISDrr;
687  }
688}
689
690/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
691/// of the comparison, return an opcode that works for the compare (e.g.
692/// CMP32ri) otherwise return 0.
693static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
694  switch (VT.getSimpleVT().SimpleTy) {
695  // Otherwise, we can't fold the immediate into this comparison.
696  default: return 0;
697  case MVT::i8: return X86::CMP8ri;
698  case MVT::i16: return X86::CMP16ri;
699  case MVT::i32: return X86::CMP32ri;
700  case MVT::i64:
701    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
702    // field.
703    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
704      return X86::CMP64ri32;
705    return 0;
706  }
707}
708
709bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
710                                     EVT VT) {
711  unsigned Op0Reg = getRegForValue(Op0);
712  if (Op0Reg == 0) return false;
713
714  // Handle 'null' like i32/i64 0.
715  if (isa<ConstantPointerNull>(Op1))
716    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
717
718  // We have two options: compare with register or immediate.  If the RHS of
719  // the compare is an immediate that we can fold into this compare, use
720  // CMPri, otherwise use CMPrr.
721  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
722    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
723      BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
724                                          .addImm(Op1C->getSExtValue());
725      return true;
726    }
727  }
728
729  unsigned CompareOpc = X86ChooseCmpOpcode(VT);
730  if (CompareOpc == 0) return false;
731
732  unsigned Op1Reg = getRegForValue(Op1);
733  if (Op1Reg == 0) return false;
734  BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
735
736  return true;
737}
738
739bool X86FastISel::X86SelectCmp(const Instruction *I) {
740  const CmpInst *CI = cast<CmpInst>(I);
741
742  EVT VT;
743  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
744    return false;
745
746  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
747  unsigned SetCCOpc;
748  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
749  switch (CI->getPredicate()) {
750  case CmpInst::FCMP_OEQ: {
751    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
752      return false;
753
754    unsigned EReg = createResultReg(&X86::GR8RegClass);
755    unsigned NPReg = createResultReg(&X86::GR8RegClass);
756    BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
757    BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
758    BuildMI(MBB, DL,
759            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
760    UpdateValueMap(I, ResultReg);
761    return true;
762  }
763  case CmpInst::FCMP_UNE: {
764    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
765      return false;
766
767    unsigned NEReg = createResultReg(&X86::GR8RegClass);
768    unsigned PReg = createResultReg(&X86::GR8RegClass);
769    BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
770    BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
771    BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
772    UpdateValueMap(I, ResultReg);
773    return true;
774  }
775  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
776  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
777  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
778  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
779  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
780  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
781  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
782  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
783  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
784  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
785  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
786  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
787
788  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
789  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
790  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
791  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
792  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
793  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
794  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
795  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
796  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
797  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
798  default:
799    return false;
800  }
801
802  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
803  if (SwapArgs)
804    std::swap(Op0, Op1);
805
806  // Emit a compare of Op0/Op1.
807  if (!X86FastEmitCompare(Op0, Op1, VT))
808    return false;
809
810  BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
811  UpdateValueMap(I, ResultReg);
812  return true;
813}
814
815bool X86FastISel::X86SelectZExt(const Instruction *I) {
816  // Handle zero-extension from i1 to i8, which is common.
817  if (I->getType()->isIntegerTy(8) &&
818      I->getOperand(0)->getType()->isIntegerTy(1)) {
819    unsigned ResultReg = getRegForValue(I->getOperand(0));
820    if (ResultReg == 0) return false;
821    // Set the high bits to zero.
822    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
823    if (ResultReg == 0) return false;
824    UpdateValueMap(I, ResultReg);
825    return true;
826  }
827
828  return false;
829}
830
831
832bool X86FastISel::X86SelectBranch(const Instruction *I) {
833  // Unconditional branches are selected by tablegen-generated code.
834  // Handle a conditional branch.
835  const BranchInst *BI = cast<BranchInst>(I);
836  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
837  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
838
839  // Fold the common case of a conditional branch with a comparison.
840  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
841    if (CI->hasOneUse()) {
842      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
843
844      // Try to take advantage of fallthrough opportunities.
845      CmpInst::Predicate Predicate = CI->getPredicate();
846      if (MBB->isLayoutSuccessor(TrueMBB)) {
847        std::swap(TrueMBB, FalseMBB);
848        Predicate = CmpInst::getInversePredicate(Predicate);
849      }
850
851      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
852      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
853
854      switch (Predicate) {
855      case CmpInst::FCMP_OEQ:
856        std::swap(TrueMBB, FalseMBB);
857        Predicate = CmpInst::FCMP_UNE;
858        // FALL THROUGH
859      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
860      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
861      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
862      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
863      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
864      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
865      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
866      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
867      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
868      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
869      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
870      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
871      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
872
873      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
874      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
875      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
876      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
877      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
878      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
879      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
880      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
881      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
882      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
883      default:
884        return false;
885      }
886
887      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
888      if (SwapArgs)
889        std::swap(Op0, Op1);
890
891      // Emit a compare of the LHS and RHS, setting the flags.
892      if (!X86FastEmitCompare(Op0, Op1, VT))
893        return false;
894
895      BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
896
897      if (Predicate == CmpInst::FCMP_UNE) {
898        // X86 requires a second branch to handle UNE (and OEQ,
899        // which is mapped to UNE above).
900        BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
901      }
902
903      FastEmitBranch(FalseMBB, DL);
904      MBB->addSuccessor(TrueMBB);
905      return true;
906    }
907  } else if (ExtractValueInst *EI =
908             dyn_cast<ExtractValueInst>(BI->getCondition())) {
909    // Check to see if the branch instruction is from an "arithmetic with
910    // overflow" intrinsic. The main way these intrinsics are used is:
911    //
912    //   %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
913    //   %sum = extractvalue { i32, i1 } %t, 0
914    //   %obit = extractvalue { i32, i1 } %t, 1
915    //   br i1 %obit, label %overflow, label %normal
916    //
917    // The %sum and %obit are converted in an ADD and a SETO/SETB before
918    // reaching the branch. Therefore, we search backwards through the MBB
919    // looking for the SETO/SETB instruction. If an instruction modifies the
920    // EFLAGS register before we reach the SETO/SETB instruction, then we can't
921    // convert the branch into a JO/JB instruction.
922    if (const IntrinsicInst *CI =
923          dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
924      if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
925          CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
926        const MachineInstr *SetMI = 0;
927        unsigned Reg = getRegForValue(EI);
928
929        for (MachineBasicBlock::const_reverse_iterator
930               RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
931          const MachineInstr &MI = *RI;
932
933          if (MI.definesRegister(Reg)) {
934            unsigned Src, Dst, SrcSR, DstSR;
935
936            if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
937              Reg = Src;
938              continue;
939            }
940
941            SetMI = &MI;
942            break;
943          }
944
945          const TargetInstrDesc &TID = MI.getDesc();
946          if (TID.hasUnmodeledSideEffects() ||
947              TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
948            break;
949        }
950
951        if (SetMI) {
952          unsigned OpCode = SetMI->getOpcode();
953
954          if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
955            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
956                                        X86::JO_4 : X86::JB_4))
957              .addMBB(TrueMBB);
958            FastEmitBranch(FalseMBB, DL);
959            MBB->addSuccessor(TrueMBB);
960            return true;
961          }
962        }
963      }
964    }
965  }
966
967  // Otherwise do a clumsy setcc and re-test it.
968  unsigned OpReg = getRegForValue(BI->getCondition());
969  if (OpReg == 0) return false;
970
971  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
972  BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
973  FastEmitBranch(FalseMBB, DL);
974  MBB->addSuccessor(TrueMBB);
975  return true;
976}
977
978bool X86FastISel::X86SelectShift(const Instruction *I) {
979  unsigned CReg = 0, OpReg = 0, OpImm = 0;
980  const TargetRegisterClass *RC = NULL;
981  if (I->getType()->isIntegerTy(8)) {
982    CReg = X86::CL;
983    RC = &X86::GR8RegClass;
984    switch (I->getOpcode()) {
985    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
986    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
987    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
988    default: return false;
989    }
990  } else if (I->getType()->isIntegerTy(16)) {
991    CReg = X86::CX;
992    RC = &X86::GR16RegClass;
993    switch (I->getOpcode()) {
994    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
995    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
996    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
997    default: return false;
998    }
999  } else if (I->getType()->isIntegerTy(32)) {
1000    CReg = X86::ECX;
1001    RC = &X86::GR32RegClass;
1002    switch (I->getOpcode()) {
1003    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
1004    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
1005    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
1006    default: return false;
1007    }
1008  } else if (I->getType()->isIntegerTy(64)) {
1009    CReg = X86::RCX;
1010    RC = &X86::GR64RegClass;
1011    switch (I->getOpcode()) {
1012    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
1013    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
1014    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
1015    default: return false;
1016    }
1017  } else {
1018    return false;
1019  }
1020
1021  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
1022  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
1023    return false;
1024
1025  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1026  if (Op0Reg == 0) return false;
1027
1028  // Fold immediate in shl(x,3).
1029  if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
1030    unsigned ResultReg = createResultReg(RC);
1031    BuildMI(MBB, DL, TII.get(OpImm),
1032            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
1033    UpdateValueMap(I, ResultReg);
1034    return true;
1035  }
1036
1037  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1038  if (Op1Reg == 0) return false;
1039  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
1040
1041  // The shift instruction uses X86::CL. If we defined a super-register
1042  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1043  if (CReg != X86::CL)
1044    BuildMI(MBB, DL, TII.get(TargetOpcode::KILL), X86::CL)
1045      .addReg(CReg, RegState::Kill);
1046
1047  unsigned ResultReg = createResultReg(RC);
1048  BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
1049  UpdateValueMap(I, ResultReg);
1050  return true;
1051}
1052
1053bool X86FastISel::X86SelectSelect(const Instruction *I) {
1054  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
1055  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
1056    return false;
1057
1058  unsigned Opc = 0;
1059  const TargetRegisterClass *RC = NULL;
1060  if (VT.getSimpleVT() == MVT::i16) {
1061    Opc = X86::CMOVE16rr;
1062    RC = &X86::GR16RegClass;
1063  } else if (VT.getSimpleVT() == MVT::i32) {
1064    Opc = X86::CMOVE32rr;
1065    RC = &X86::GR32RegClass;
1066  } else if (VT.getSimpleVT() == MVT::i64) {
1067    Opc = X86::CMOVE64rr;
1068    RC = &X86::GR64RegClass;
1069  } else {
1070    return false;
1071  }
1072
1073  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1074  if (Op0Reg == 0) return false;
1075  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1076  if (Op1Reg == 0) return false;
1077  unsigned Op2Reg = getRegForValue(I->getOperand(2));
1078  if (Op2Reg == 0) return false;
1079
1080  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
1081  unsigned ResultReg = createResultReg(RC);
1082  BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
1083  UpdateValueMap(I, ResultReg);
1084  return true;
1085}
1086
1087bool X86FastISel::X86SelectFPExt(const Instruction *I) {
1088  // fpext from float to double.
1089  if (Subtarget->hasSSE2() &&
1090      I->getType()->isDoubleTy()) {
1091    const Value *V = I->getOperand(0);
1092    if (V->getType()->isFloatTy()) {
1093      unsigned OpReg = getRegForValue(V);
1094      if (OpReg == 0) return false;
1095      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
1096      BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
1097      UpdateValueMap(I, ResultReg);
1098      return true;
1099    }
1100  }
1101
1102  return false;
1103}
1104
1105bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
1106  if (Subtarget->hasSSE2()) {
1107    if (I->getType()->isFloatTy()) {
1108      const Value *V = I->getOperand(0);
1109      if (V->getType()->isDoubleTy()) {
1110        unsigned OpReg = getRegForValue(V);
1111        if (OpReg == 0) return false;
1112        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
1113        BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
1114        UpdateValueMap(I, ResultReg);
1115        return true;
1116      }
1117    }
1118  }
1119
1120  return false;
1121}
1122
1123bool X86FastISel::X86SelectTrunc(const Instruction *I) {
1124  if (Subtarget->is64Bit())
1125    // All other cases should be handled by the tblgen generated code.
1126    return false;
1127  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1128  EVT DstVT = TLI.getValueType(I->getType());
1129
1130  // This code only handles truncation to byte right now.
1131  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1132    // All other cases should be handled by the tblgen generated code.
1133    return false;
1134  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
1135    // All other cases should be handled by the tblgen generated code.
1136    return false;
1137
1138  unsigned InputReg = getRegForValue(I->getOperand(0));
1139  if (!InputReg)
1140    // Unhandled operand.  Halt "fast" selection and bail.
1141    return false;
1142
1143  // First issue a copy to GR16_ABCD or GR32_ABCD.
1144  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
1145  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
1146    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
1147  unsigned CopyReg = createResultReg(CopyRC);
1148  BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
1149
1150  // Then issue an extract_subreg.
1151  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1152                                                  CopyReg, /*Kill=*/true,
1153                                                  X86::sub_8bit);
1154  if (!ResultReg)
1155    return false;
1156
1157  UpdateValueMap(I, ResultReg);
1158  return true;
1159}
1160
1161bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
1162  const ExtractValueInst *EI = cast<ExtractValueInst>(I);
1163  const Value *Agg = EI->getAggregateOperand();
1164
1165  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
1166    switch (CI->getIntrinsicID()) {
1167    default: break;
1168    case Intrinsic::sadd_with_overflow:
1169    case Intrinsic::uadd_with_overflow:
1170      // Cheat a little. We know that the registers for "add" and "seto" are
1171      // allocated sequentially. However, we only keep track of the register
1172      // for "add" in the value map. Use extractvalue's index to get the
1173      // correct register for "seto".
1174      UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
1175      return true;
1176    }
1177  }
1178
1179  return false;
1180}
1181
1182bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
1183  // FIXME: Handle more intrinsics.
1184  switch (I.getIntrinsicID()) {
1185  default: return false;
1186  case Intrinsic::stackprotector: {
1187    // Emit code inline code to store the stack guard onto the stack.
1188    EVT PtrTy = TLI.getPointerTy();
1189
1190    const Value *Op1 = I.getArgOperand(0); // The guard's value.
1191    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
1192
1193    // Grab the frame index.
1194    X86AddressMode AM;
1195    if (!X86SelectAddress(Slot, AM)) return false;
1196
1197    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
1198
1199    return true;
1200  }
1201  case Intrinsic::objectsize: {
1202    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
1203    const Type *Ty = I.getCalledFunction()->getReturnType();
1204
1205    assert(CI && "Non-constant type in Intrinsic::objectsize?");
1206
1207    EVT VT;
1208    if (!isTypeLegal(Ty, VT))
1209      return false;
1210
1211    unsigned OpC = 0;
1212    if (VT == MVT::i32)
1213      OpC = X86::MOV32ri;
1214    else if (VT == MVT::i64)
1215      OpC = X86::MOV64ri;
1216    else
1217      return false;
1218
1219    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1220    BuildMI(MBB, DL, TII.get(OpC), ResultReg).
1221                                  addImm(CI->isZero() ? -1ULL : 0);
1222    UpdateValueMap(&I, ResultReg);
1223    return true;
1224  }
1225  case Intrinsic::dbg_declare: {
1226    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
1227    X86AddressMode AM;
1228    assert(DI->getAddress() && "Null address should be checked earlier!");
1229    if (!X86SelectAddress(DI->getAddress(), AM))
1230      return false;
1231    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1232    // FIXME may need to add RegState::Debug to any registers produced,
1233    // although ESP/EBP should be the only ones at the moment.
1234    addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
1235                                        addMetadata(DI->getVariable());
1236    return true;
1237  }
1238  case Intrinsic::trap: {
1239    BuildMI(MBB, DL, TII.get(X86::TRAP));
1240    return true;
1241  }
1242  case Intrinsic::sadd_with_overflow:
1243  case Intrinsic::uadd_with_overflow: {
1244    // Replace "add with overflow" intrinsics with an "add" instruction followed
1245    // by a seto/setc instruction. Later on, when the "extractvalue"
1246    // instructions are encountered, we use the fact that two registers were
1247    // created sequentially to get the correct registers for the "sum" and the
1248    // "overflow bit".
1249    const Function *Callee = I.getCalledFunction();
1250    const Type *RetTy =
1251      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1252
1253    EVT VT;
1254    if (!isTypeLegal(RetTy, VT))
1255      return false;
1256
1257    const Value *Op1 = I.getArgOperand(0);
1258    const Value *Op2 = I.getArgOperand(1);
1259    unsigned Reg1 = getRegForValue(Op1);
1260    unsigned Reg2 = getRegForValue(Op2);
1261
1262    if (Reg1 == 0 || Reg2 == 0)
1263      // FIXME: Handle values *not* in registers.
1264      return false;
1265
1266    unsigned OpC = 0;
1267    if (VT == MVT::i32)
1268      OpC = X86::ADD32rr;
1269    else if (VT == MVT::i64)
1270      OpC = X86::ADD64rr;
1271    else
1272      return false;
1273
1274    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1275    BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
1276    unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
1277
1278    // If the add with overflow is an intra-block value then we just want to
1279    // create temporaries for it like normal.  If it is a cross-block value then
1280    // UpdateValueMap will return the cross-block register used.  Since we
1281    // *really* want the value to be live in the register pair known by
1282    // UpdateValueMap, we have to use DestReg1+1 as the destination register in
1283    // the cross block case.  In the non-cross-block case, we should just make
1284    // another register for the value.
1285    if (DestReg1 != ResultReg)
1286      ResultReg = DestReg1+1;
1287    else
1288      ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
1289
1290    unsigned Opc = X86::SETBr;
1291    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
1292      Opc = X86::SETOr;
1293    BuildMI(MBB, DL, TII.get(Opc), ResultReg);
1294    return true;
1295  }
1296  }
1297}
1298
1299bool X86FastISel::X86SelectCall(const Instruction *I) {
1300  const CallInst *CI = cast<CallInst>(I);
1301  const Value *Callee = CI->getCalledValue();
1302
1303  // Can't handle inline asm yet.
1304  if (isa<InlineAsm>(Callee))
1305    return false;
1306
1307  // Handle intrinsic calls.
1308  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1309    return X86VisitIntrinsicCall(*II);
1310
1311  // Handle only C and fastcc calling conventions for now.
1312  ImmutableCallSite CS(CI);
1313  CallingConv::ID CC = CS.getCallingConv();
1314  if (CC != CallingConv::C &&
1315      CC != CallingConv::Fast &&
1316      CC != CallingConv::X86_FastCall)
1317    return false;
1318
1319  // fastcc with -tailcallopt is intended to provide a guaranteed
1320  // tail call optimization. Fastisel doesn't know how to do that.
1321  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
1322    return false;
1323
1324  // Let SDISel handle vararg functions.
1325  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1326  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1327  if (FTy->isVarArg())
1328    return false;
1329
1330  // Fast-isel doesn't know about callee-pop yet.
1331  if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
1332    return false;
1333
1334  // Handle *simple* calls for now.
1335  const Type *RetTy = CS.getType();
1336  EVT RetVT;
1337  if (RetTy->isVoidTy())
1338    RetVT = MVT::isVoid;
1339  else if (!isTypeLegal(RetTy, RetVT, true))
1340    return false;
1341
1342  // Materialize callee address in a register. FIXME: GV address can be
1343  // handled with a CALLpcrel32 instead.
1344  X86AddressMode CalleeAM;
1345  if (!X86SelectCallAddress(Callee, CalleeAM))
1346    return false;
1347  unsigned CalleeOp = 0;
1348  const GlobalValue *GV = 0;
1349  if (CalleeAM.GV != 0) {
1350    GV = CalleeAM.GV;
1351  } else if (CalleeAM.Base.Reg != 0) {
1352    CalleeOp = CalleeAM.Base.Reg;
1353  } else
1354    return false;
1355
1356  // Allow calls which produce i1 results.
1357  bool AndToI1 = false;
1358  if (RetVT == MVT::i1) {
1359    RetVT = MVT::i8;
1360    AndToI1 = true;
1361  }
1362
1363  // Deal with call operands first.
1364  SmallVector<const Value *, 8> ArgVals;
1365  SmallVector<unsigned, 8> Args;
1366  SmallVector<EVT, 8> ArgVTs;
1367  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1368  Args.reserve(CS.arg_size());
1369  ArgVals.reserve(CS.arg_size());
1370  ArgVTs.reserve(CS.arg_size());
1371  ArgFlags.reserve(CS.arg_size());
1372  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1373       i != e; ++i) {
1374    unsigned Arg = getRegForValue(*i);
1375    if (Arg == 0)
1376      return false;
1377    ISD::ArgFlagsTy Flags;
1378    unsigned AttrInd = i - CS.arg_begin() + 1;
1379    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1380      Flags.setSExt();
1381    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1382      Flags.setZExt();
1383
1384    // FIXME: Only handle *easy* calls for now.
1385    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1386        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1387        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1388        CS.paramHasAttr(AttrInd, Attribute::ByVal))
1389      return false;
1390
1391    const Type *ArgTy = (*i)->getType();
1392    EVT ArgVT;
1393    if (!isTypeLegal(ArgTy, ArgVT))
1394      return false;
1395    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1396    Flags.setOrigAlign(OriginalAlignment);
1397
1398    Args.push_back(Arg);
1399    ArgVals.push_back(*i);
1400    ArgVTs.push_back(ArgVT);
1401    ArgFlags.push_back(Flags);
1402  }
1403
1404  // Analyze operands of the call, assigning locations to each operand.
1405  SmallVector<CCValAssign, 16> ArgLocs;
1406  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
1407
1408  // Allocate shadow area for Win64
1409  if (Subtarget->isTargetWin64()) {
1410    CCInfo.AllocateStack(32, 8);
1411  }
1412
1413  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
1414
1415  // Get a count of how many bytes are to be pushed on the stack.
1416  unsigned NumBytes = CCInfo.getNextStackOffset();
1417
1418  // Issue CALLSEQ_START
1419  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1420  BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
1421
1422  // Process argument: walk the register/memloc assignments, inserting
1423  // copies / loads.
1424  SmallVector<unsigned, 4> RegArgs;
1425  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1426    CCValAssign &VA = ArgLocs[i];
1427    unsigned Arg = Args[VA.getValNo()];
1428    EVT ArgVT = ArgVTs[VA.getValNo()];
1429
1430    // Promote the value if needed.
1431    switch (VA.getLocInfo()) {
1432    default: llvm_unreachable("Unknown loc info!");
1433    case CCValAssign::Full: break;
1434    case CCValAssign::SExt: {
1435      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1436                                       Arg, ArgVT, Arg);
1437      assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
1438      Emitted = true;
1439      ArgVT = VA.getLocVT();
1440      break;
1441    }
1442    case CCValAssign::ZExt: {
1443      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1444                                       Arg, ArgVT, Arg);
1445      assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
1446      Emitted = true;
1447      ArgVT = VA.getLocVT();
1448      break;
1449    }
1450    case CCValAssign::AExt: {
1451      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1452                                       Arg, ArgVT, Arg);
1453      if (!Emitted)
1454        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1455                                    Arg, ArgVT, Arg);
1456      if (!Emitted)
1457        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1458                                    Arg, ArgVT, Arg);
1459
1460      assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
1461      ArgVT = VA.getLocVT();
1462      break;
1463    }
1464    case CCValAssign::BCvt: {
1465      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
1466                               ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
1467      assert(BC != 0 && "Failed to emit a bitcast!");
1468      Arg = BC;
1469      ArgVT = VA.getLocVT();
1470      break;
1471    }
1472    }
1473
1474    if (VA.isRegLoc()) {
1475      TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
1476      bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
1477                                      Arg, RC, RC, DL);
1478      assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1479      Emitted = true;
1480      RegArgs.push_back(VA.getLocReg());
1481    } else {
1482      unsigned LocMemOffset = VA.getLocMemOffset();
1483      X86AddressMode AM;
1484      AM.Base.Reg = StackPtr;
1485      AM.Disp = LocMemOffset;
1486      const Value *ArgVal = ArgVals[VA.getValNo()];
1487
1488      // If this is a really simple value, emit this with the Value* version of
1489      // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
1490      // can cause us to reevaluate the argument.
1491      if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
1492        X86FastEmitStore(ArgVT, ArgVal, AM);
1493      else
1494        X86FastEmitStore(ArgVT, Arg, AM);
1495    }
1496  }
1497
1498  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1499  // GOT pointer.
1500  if (Subtarget->isPICStyleGOT()) {
1501    TargetRegisterClass *RC = X86::GR32RegisterClass;
1502    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
1503    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
1504                                    DL);
1505    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1506    Emitted = true;
1507  }
1508
1509  // Issue the call.
1510  MachineInstrBuilder MIB;
1511  if (CalleeOp) {
1512    // Register-indirect call.
1513    unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
1514    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
1515
1516  } else {
1517    // Direct call.
1518    assert(GV && "Not a direct call");
1519    unsigned CallOpc =
1520      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
1521
1522    // See if we need any target-specific flags on the GV operand.
1523    unsigned char OpFlags = 0;
1524
1525    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
1526    // external symbols most go through the PLT in PIC mode.  If the symbol
1527    // has hidden or protected visibility, or if it is static or local, then
1528    // we don't need to use the PLT - we can directly call it.
1529    if (Subtarget->isTargetELF() &&
1530        TM.getRelocationModel() == Reloc::PIC_ &&
1531        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
1532      OpFlags = X86II::MO_PLT;
1533    } else if (Subtarget->isPICStyleStubAny() &&
1534               (GV->isDeclaration() || GV->isWeakForLinker()) &&
1535               Subtarget->getDarwinVers() < 9) {
1536      // PC-relative references to external symbols should go through $stub,
1537      // unless we're building with the leopard linker or later, which
1538      // automatically synthesizes these stubs.
1539      OpFlags = X86II::MO_DARWIN_STUB;
1540    }
1541
1542
1543    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
1544  }
1545
1546  // Add an implicit use GOT pointer in EBX.
1547  if (Subtarget->isPICStyleGOT())
1548    MIB.addReg(X86::EBX);
1549
1550  // Add implicit physical register uses to the call.
1551  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1552    MIB.addReg(RegArgs[i]);
1553
1554  // Issue CALLSEQ_END
1555  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1556  BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
1557
1558  // Now handle call return value (if any).
1559  SmallVector<unsigned, 4> UsedRegs;
1560  if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
1561    SmallVector<CCValAssign, 16> RVLocs;
1562    CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
1563    CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
1564
1565    // Copy all of the result registers out of their specified physreg.
1566    assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1567    EVT CopyVT = RVLocs[0].getValVT();
1568    TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1569    TargetRegisterClass *SrcRC = DstRC;
1570
1571    // If this is a call to a function that returns an fp value on the x87 fp
1572    // stack, but where we prefer to use the value in xmm registers, copy it
1573    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1574    if ((RVLocs[0].getLocReg() == X86::ST0 ||
1575         RVLocs[0].getLocReg() == X86::ST1) &&
1576        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
1577      CopyVT = MVT::f80;
1578      SrcRC = X86::RSTRegisterClass;
1579      DstRC = X86::RFP80RegisterClass;
1580    }
1581
1582    unsigned ResultReg = createResultReg(DstRC);
1583    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
1584                                    RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
1585    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1586    Emitted = true;
1587    UsedRegs.push_back(RVLocs[0].getLocReg());
1588
1589    if (CopyVT != RVLocs[0].getValVT()) {
1590      // Round the F80 the right size, which also moves to the appropriate xmm
1591      // register. This is accomplished by storing the F80 value in memory and
1592      // then loading it back. Ewww...
1593      EVT ResVT = RVLocs[0].getValVT();
1594      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
1595      unsigned MemSize = ResVT.getSizeInBits()/8;
1596      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
1597      addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
1598      DstRC = ResVT == MVT::f32
1599        ? X86::FR32RegisterClass : X86::FR64RegisterClass;
1600      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
1601      ResultReg = createResultReg(DstRC);
1602      addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
1603    }
1604
1605    if (AndToI1) {
1606      // Mask out all but lowest bit for some call which produces an i1.
1607      unsigned AndResult = createResultReg(X86::GR8RegisterClass);
1608      BuildMI(MBB, DL,
1609              TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
1610      ResultReg = AndResult;
1611    }
1612
1613    UpdateValueMap(I, ResultReg);
1614  }
1615
1616  // Set all unused physreg defs as dead.
1617  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1618
1619  return true;
1620}
1621
1622
1623bool
1624X86FastISel::TargetSelectInstruction(const Instruction *I)  {
1625  switch (I->getOpcode()) {
1626  default: break;
1627  case Instruction::Load:
1628    return X86SelectLoad(I);
1629  case Instruction::Store:
1630    return X86SelectStore(I);
1631  case Instruction::ICmp:
1632  case Instruction::FCmp:
1633    return X86SelectCmp(I);
1634  case Instruction::ZExt:
1635    return X86SelectZExt(I);
1636  case Instruction::Br:
1637    return X86SelectBranch(I);
1638  case Instruction::Call:
1639    return X86SelectCall(I);
1640  case Instruction::LShr:
1641  case Instruction::AShr:
1642  case Instruction::Shl:
1643    return X86SelectShift(I);
1644  case Instruction::Select:
1645    return X86SelectSelect(I);
1646  case Instruction::Trunc:
1647    return X86SelectTrunc(I);
1648  case Instruction::FPExt:
1649    return X86SelectFPExt(I);
1650  case Instruction::FPTrunc:
1651    return X86SelectFPTrunc(I);
1652  case Instruction::ExtractValue:
1653    return X86SelectExtractValue(I);
1654  case Instruction::IntToPtr: // Deliberate fall-through.
1655  case Instruction::PtrToInt: {
1656    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1657    EVT DstVT = TLI.getValueType(I->getType());
1658    if (DstVT.bitsGT(SrcVT))
1659      return X86SelectZExt(I);
1660    if (DstVT.bitsLT(SrcVT))
1661      return X86SelectTrunc(I);
1662    unsigned Reg = getRegForValue(I->getOperand(0));
1663    if (Reg == 0) return false;
1664    UpdateValueMap(I, Reg);
1665    return true;
1666  }
1667  }
1668
1669  return false;
1670}
1671
1672unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
1673  EVT VT;
1674  if (!isTypeLegal(C->getType(), VT))
1675    return false;
1676
1677  // Get opcode and regclass of the output for the given load instruction.
1678  unsigned Opc = 0;
1679  const TargetRegisterClass *RC = NULL;
1680  switch (VT.getSimpleVT().SimpleTy) {
1681  default: return false;
1682  case MVT::i8:
1683    Opc = X86::MOV8rm;
1684    RC  = X86::GR8RegisterClass;
1685    break;
1686  case MVT::i16:
1687    Opc = X86::MOV16rm;
1688    RC  = X86::GR16RegisterClass;
1689    break;
1690  case MVT::i32:
1691    Opc = X86::MOV32rm;
1692    RC  = X86::GR32RegisterClass;
1693    break;
1694  case MVT::i64:
1695    // Must be in x86-64 mode.
1696    Opc = X86::MOV64rm;
1697    RC  = X86::GR64RegisterClass;
1698    break;
1699  case MVT::f32:
1700    if (Subtarget->hasSSE1()) {
1701      Opc = X86::MOVSSrm;
1702      RC  = X86::FR32RegisterClass;
1703    } else {
1704      Opc = X86::LD_Fp32m;
1705      RC  = X86::RFP32RegisterClass;
1706    }
1707    break;
1708  case MVT::f64:
1709    if (Subtarget->hasSSE2()) {
1710      Opc = X86::MOVSDrm;
1711      RC  = X86::FR64RegisterClass;
1712    } else {
1713      Opc = X86::LD_Fp64m;
1714      RC  = X86::RFP64RegisterClass;
1715    }
1716    break;
1717  case MVT::f80:
1718    // No f80 support yet.
1719    return false;
1720  }
1721
1722  // Materialize addresses with LEA instructions.
1723  if (isa<GlobalValue>(C)) {
1724    X86AddressMode AM;
1725    if (X86SelectAddress(C, AM)) {
1726      if (TLI.getPointerTy() == MVT::i32)
1727        Opc = X86::LEA32r;
1728      else
1729        Opc = X86::LEA64r;
1730      unsigned ResultReg = createResultReg(RC);
1731      addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1732      return ResultReg;
1733    }
1734    return 0;
1735  }
1736
1737  // MachineConstantPool wants an explicit alignment.
1738  unsigned Align = TD.getPrefTypeAlignment(C->getType());
1739  if (Align == 0) {
1740    // Alignment of vector types.  FIXME!
1741    Align = TD.getTypeAllocSize(C->getType());
1742  }
1743
1744  // x86-32 PIC requires a PIC base register for constant pools.
1745  unsigned PICBase = 0;
1746  unsigned char OpFlag = 0;
1747  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
1748    OpFlag = X86II::MO_PIC_BASE_OFFSET;
1749    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
1750  } else if (Subtarget->isPICStyleGOT()) {
1751    OpFlag = X86II::MO_GOTOFF;
1752    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
1753  } else if (Subtarget->isPICStyleRIPRel() &&
1754             TM.getCodeModel() == CodeModel::Small) {
1755    PICBase = X86::RIP;
1756  }
1757
1758  // Create the load from the constant pool.
1759  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
1760  unsigned ResultReg = createResultReg(RC);
1761  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
1762                           MCPOffset, PICBase, OpFlag);
1763
1764  return ResultReg;
1765}
1766
1767unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
1768  // Fail on dynamic allocas. At this point, getRegForValue has already
1769  // checked its CSE maps, so if we're here trying to handle a dynamic
1770  // alloca, we're not going to succeed. X86SelectAddress has a
1771  // check for dynamic allocas, because it's called directly from
1772  // various places, but TargetMaterializeAlloca also needs a check
1773  // in order to avoid recursion between getRegForValue,
1774  // X86SelectAddrss, and TargetMaterializeAlloca.
1775  if (!FuncInfo.StaticAllocaMap.count(C))
1776    return 0;
1777
1778  X86AddressMode AM;
1779  if (!X86SelectAddress(C, AM))
1780    return 0;
1781  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
1782  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
1783  unsigned ResultReg = createResultReg(RC);
1784  addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1785  return ResultReg;
1786}
1787
1788namespace llvm {
1789  llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
1790    return new X86FastISel(funcInfo);
1791  }
1792}
1793