X86FastISel.cpp revision 57c3dac0df7ac1b53ae7c0e5d2adc459fc7bd37c
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/CallingConv.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/Instructions.h"
25#include "llvm/CodeGen/FastISel.h"
26#include "llvm/CodeGen/MachineConstantPool.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/Support/CallSite.h"
30#include "llvm/Support/GetElementPtrTypeIterator.h"
31
32using namespace llvm;
33
34class X86FastISel : public FastISel {
35  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
36  /// make the right decision when generating code for different targets.
37  const X86Subtarget *Subtarget;
38
39  /// StackPtr - Register used as the stack pointer.
40  ///
41  unsigned StackPtr;
42
43  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
44  /// floating point ops.
45  /// When SSE is available, use it for f32 operations.
46  /// When SSE2 is available, use it for f64 operations.
47  bool X86ScalarSSEf64;
48  bool X86ScalarSSEf32;
49
50public:
51  explicit X86FastISel(MachineFunction &mf,
52                       MachineModuleInfo *mmi,
53                       DenseMap<const Value *, unsigned> &vm,
54                       DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
55                       DenseMap<const AllocaInst *, int> &am)
56    : FastISel(mf, mmi, vm, bm, am) {
57    Subtarget = &TM.getSubtarget<X86Subtarget>();
58    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
59    X86ScalarSSEf64 = Subtarget->hasSSE2();
60    X86ScalarSSEf32 = Subtarget->hasSSE1();
61  }
62
63  virtual bool TargetSelectInstruction(Instruction *I);
64
65#include "X86GenFastISel.inc"
66
67private:
68  bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR);
69
70  bool X86FastEmitStore(MVT VT, unsigned Val,
71                        const X86AddressMode &AM);
72
73  bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT,
74                         unsigned &ResultReg);
75
76  bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall);
77
78  bool X86SelectLoad(Instruction *I);
79
80  bool X86SelectStore(Instruction *I);
81
82  bool X86SelectCmp(Instruction *I);
83
84  bool X86SelectZExt(Instruction *I);
85
86  bool X86SelectBranch(Instruction *I);
87
88  bool X86SelectShift(Instruction *I);
89
90  bool X86SelectSelect(Instruction *I);
91
92  bool X86SelectTrunc(Instruction *I);
93
94  bool X86SelectFPExt(Instruction *I);
95  bool X86SelectFPTrunc(Instruction *I);
96
97  bool X86SelectCall(Instruction *I);
98
99  CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
100
101  const X86InstrInfo *getInstrInfo() const {
102    return getTargetMachine()->getInstrInfo();
103  }
104  const X86TargetMachine *getTargetMachine() const {
105    return static_cast<const X86TargetMachine *>(&TM);
106  }
107
108  unsigned TargetMaterializeConstant(Constant *C);
109
110  unsigned TargetMaterializeAlloca(AllocaInst *C);
111
112  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
113  /// computed in an SSE register, not on the X87 floating point stack.
114  bool isScalarFPTypeInSSEReg(MVT VT) const {
115    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
116      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
117  }
118
119  bool isTypeLegal(const Type *Ty, const TargetLowering &TLI, MVT &VT,
120                   bool AllowI1 = false);
121};
122
123bool X86FastISel::isTypeLegal(const Type *Ty, const TargetLowering &TLI,
124                              MVT &VT, bool AllowI1) {
125  VT = MVT::getMVT(Ty, /*HandleUnknown=*/true);
126  if (VT == MVT::Other || !VT.isSimple())
127    // Unhandled type. Halt "fast" selection and bail.
128    return false;
129  if (VT == MVT::iPTR)
130    // Use pointer type.
131    VT = TLI.getPointerTy();
132  // For now, require SSE/SSE2 for performing floating-point operations,
133  // since x87 requires additional work.
134  if (VT == MVT::f64 && !X86ScalarSSEf64)
135     return false;
136  if (VT == MVT::f32 && !X86ScalarSSEf32)
137     return false;
138  // Similarly, no f80 support yet.
139  if (VT == MVT::f80)
140    return false;
141  // We only handle legal types. For example, on x86-32 the instruction
142  // selector contains all of the 64-bit instructions from x86-64,
143  // under the assumption that i64 won't be used if the target doesn't
144  // support it.
145  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
146}
147
148#include "X86GenCallingConv.inc"
149
150/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
151/// convention.
152CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
153  if (Subtarget->is64Bit()) {
154    if (Subtarget->isTargetWin64())
155      return CC_X86_Win64_C;
156    else if (CC == CallingConv::Fast && isTaillCall)
157      return CC_X86_64_TailCall;
158    else
159      return CC_X86_64_C;
160  }
161
162  if (CC == CallingConv::X86_FastCall)
163    return CC_X86_32_FastCall;
164  else if (CC == CallingConv::Fast)
165    return CC_X86_32_FastCC;
166  else
167    return CC_X86_32_C;
168}
169
170/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
171/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
172/// Return true and the result register by reference if it is possible.
173bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
174                                  unsigned &ResultReg) {
175  // Get opcode and regclass of the output for the given load instruction.
176  unsigned Opc = 0;
177  const TargetRegisterClass *RC = NULL;
178  switch (VT.getSimpleVT()) {
179  default: return false;
180  case MVT::i8:
181    Opc = X86::MOV8rm;
182    RC  = X86::GR8RegisterClass;
183    break;
184  case MVT::i16:
185    Opc = X86::MOV16rm;
186    RC  = X86::GR16RegisterClass;
187    break;
188  case MVT::i32:
189    Opc = X86::MOV32rm;
190    RC  = X86::GR32RegisterClass;
191    break;
192  case MVT::i64:
193    // Must be in x86-64 mode.
194    Opc = X86::MOV64rm;
195    RC  = X86::GR64RegisterClass;
196    break;
197  case MVT::f32:
198    if (Subtarget->hasSSE1()) {
199      Opc = X86::MOVSSrm;
200      RC  = X86::FR32RegisterClass;
201    } else {
202      Opc = X86::LD_Fp32m;
203      RC  = X86::RFP32RegisterClass;
204    }
205    break;
206  case MVT::f64:
207    if (Subtarget->hasSSE2()) {
208      Opc = X86::MOVSDrm;
209      RC  = X86::FR64RegisterClass;
210    } else {
211      Opc = X86::LD_Fp64m;
212      RC  = X86::RFP64RegisterClass;
213    }
214    break;
215  case MVT::f80:
216    // No f80 support yet.
217    return false;
218  }
219
220  ResultReg = createResultReg(RC);
221  addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM);
222  return true;
223}
224
225/// X86FastEmitStore - Emit a machine instruction to store a value Val of
226/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
227/// and a displacement offset, or a GlobalAddress,
228/// i.e. V. Return true if it is possible.
229bool
230X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
231                              const X86AddressMode &AM) {
232  // Get opcode and regclass of the output for the given store instruction.
233  unsigned Opc = 0;
234  const TargetRegisterClass *RC = NULL;
235  switch (VT.getSimpleVT()) {
236  default: return false;
237  case MVT::i8:
238    Opc = X86::MOV8mr;
239    RC  = X86::GR8RegisterClass;
240    break;
241  case MVT::i16:
242    Opc = X86::MOV16mr;
243    RC  = X86::GR16RegisterClass;
244    break;
245  case MVT::i32:
246    Opc = X86::MOV32mr;
247    RC  = X86::GR32RegisterClass;
248    break;
249  case MVT::i64:
250    // Must be in x86-64 mode.
251    Opc = X86::MOV64mr;
252    RC  = X86::GR64RegisterClass;
253    break;
254  case MVT::f32:
255    if (Subtarget->hasSSE1()) {
256      Opc = X86::MOVSSmr;
257      RC  = X86::FR32RegisterClass;
258    } else {
259      Opc = X86::ST_Fp32m;
260      RC  = X86::RFP32RegisterClass;
261    }
262    break;
263  case MVT::f64:
264    if (Subtarget->hasSSE2()) {
265      Opc = X86::MOVSDmr;
266      RC  = X86::FR64RegisterClass;
267    } else {
268      Opc = X86::ST_Fp64m;
269      RC  = X86::RFP64RegisterClass;
270    }
271    break;
272  case MVT::f80:
273    // No f80 support yet.
274    return false;
275  }
276
277  addFullAddress(BuildMI(MBB, TII.get(Opc)), AM).addReg(Val);
278  return true;
279}
280
281/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
282/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
283/// ISD::SIGN_EXTEND).
284bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
285                                    unsigned Src, MVT SrcVT,
286                                    unsigned &ResultReg) {
287  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
288
289  if (RR != 0) {
290    ResultReg = RR;
291    return true;
292  } else
293    return false;
294}
295
296/// X86SelectAddress - Attempt to fill in an address from the given value.
297///
298bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
299  User *U;
300  unsigned Opcode = Instruction::UserOp1;
301  if (Instruction *I = dyn_cast<Instruction>(V)) {
302    Opcode = I->getOpcode();
303    U = I;
304  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
305    Opcode = C->getOpcode();
306    U = C;
307  }
308
309  switch (Opcode) {
310  default: break;
311  case Instruction::BitCast:
312    // Look past bitcasts.
313    return X86SelectAddress(U->getOperand(0), AM, isCall);
314
315  case Instruction::IntToPtr:
316    // Look past no-op inttoptrs.
317    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
318      return X86SelectAddress(U->getOperand(0), AM, isCall);
319
320  case Instruction::PtrToInt:
321    // Look past no-op ptrtoints.
322    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
323      return X86SelectAddress(U->getOperand(0), AM, isCall);
324
325  case Instruction::Alloca: {
326    if (isCall) break;
327    // Do static allocas.
328    const AllocaInst *A = cast<AllocaInst>(V);
329    DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
330    if (SI != StaticAllocaMap.end()) {
331      AM.BaseType = X86AddressMode::FrameIndexBase;
332      AM.Base.FrameIndex = SI->second;
333      return true;
334    }
335    break;
336  }
337
338  case Instruction::Add: {
339    if (isCall) break;
340    // Adds of constants are common and easy enough.
341    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
342      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
343      // They have to fit in the 32-bit signed displacement field though.
344      if (isInt32(Disp)) {
345        AM.Disp = (uint32_t)Disp;
346        return X86SelectAddress(U->getOperand(0), AM, isCall);
347      }
348    }
349    break;
350  }
351
352  case Instruction::GetElementPtr: {
353    if (isCall) break;
354    // Pattern-match simple GEPs.
355    uint64_t Disp = (int32_t)AM.Disp;
356    unsigned IndexReg = AM.IndexReg;
357    unsigned Scale = AM.Scale;
358    gep_type_iterator GTI = gep_type_begin(U);
359    // Look at all but the last index. Constants can be folded,
360    // and one dynamic index can be handled, if the scale is supported.
361    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
362         i != e; ++i, ++GTI) {
363      Value *Op = *i;
364      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
365        const StructLayout *SL = TD.getStructLayout(STy);
366        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
367        Disp += SL->getElementOffset(Idx);
368      } else {
369        uint64_t S = TD.getABITypeSize(GTI.getIndexedType());
370        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
371          // Constant-offset addressing.
372          Disp += CI->getSExtValue() * S;
373        } else if (IndexReg == 0 &&
374                   (!AM.GV ||
375                    !getTargetMachine()->symbolicAddressesAreRIPRel()) &&
376                   (S == 1 || S == 2 || S == 4 || S == 8)) {
377          // Scaled-index addressing.
378          Scale = S;
379          IndexReg = getRegForValue(Op);
380          if (IndexReg == 0)
381            return false;
382        } else
383          // Unsupported.
384          goto unsupported_gep;
385      }
386    }
387    // Check for displacement overflow.
388    if (!isInt32(Disp))
389      break;
390    // Ok, the GEP indices were covered by constant-offset and scaled-index
391    // addressing. Update the address state and move on to examining the base.
392    AM.IndexReg = IndexReg;
393    AM.Scale = Scale;
394    AM.Disp = (uint32_t)Disp;
395    return X86SelectAddress(U->getOperand(0), AM, isCall);
396  unsupported_gep:
397    // Ok, the GEP indices weren't all covered.
398    break;
399  }
400  }
401
402  // Handle constant address.
403  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
404    // Can't handle alternate code models yet.
405    if (TM.getCodeModel() != CodeModel::Default &&
406        TM.getCodeModel() != CodeModel::Small)
407      return false;
408
409    // RIP-relative addresses can't have additional register operands.
410    if (getTargetMachine()->symbolicAddressesAreRIPRel() &&
411        (AM.Base.Reg != 0 || AM.IndexReg != 0))
412      return false;
413
414    // Set up the basic address.
415    AM.GV = GV;
416    if (!isCall &&
417        TM.getRelocationModel() == Reloc::PIC_ &&
418        !Subtarget->is64Bit())
419      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
420
421    // Emit an extra load if the ABI requires it.
422    if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
423      // Check to see if we've already materialized this
424      // value in a register in this block.
425      if (unsigned Reg = LocalValueMap[V]) {
426        AM.Base.Reg = Reg;
427        AM.GV = 0;
428        return true;
429      }
430      // Issue load from stub if necessary.
431      unsigned Opc = 0;
432      const TargetRegisterClass *RC = NULL;
433      if (TLI.getPointerTy() == MVT::i32) {
434        Opc = X86::MOV32rm;
435        RC  = X86::GR32RegisterClass;
436      } else {
437        Opc = X86::MOV64rm;
438        RC  = X86::GR64RegisterClass;
439      }
440
441      X86AddressMode StubAM;
442      StubAM.Base.Reg = AM.Base.Reg;
443      StubAM.GV = AM.GV;
444      unsigned ResultReg = createResultReg(RC);
445      addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), StubAM);
446
447      // Now construct the final address. Note that the Disp, Scale,
448      // and Index values may already be set here.
449      AM.Base.Reg = ResultReg;
450      AM.GV = 0;
451
452      // Prevent loading GV stub multiple times in same MBB.
453      LocalValueMap[V] = AM.Base.Reg;
454    }
455    return true;
456  }
457
458  // If all else fails, try to materialize the value in a register.
459  if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) {
460    if (AM.Base.Reg == 0) {
461      AM.Base.Reg = getRegForValue(V);
462      return AM.Base.Reg != 0;
463    }
464    if (AM.IndexReg == 0) {
465      assert(AM.Scale == 1 && "Scale with no index!");
466      AM.IndexReg = getRegForValue(V);
467      return AM.IndexReg != 0;
468    }
469  }
470
471  return false;
472}
473
474/// X86SelectStore - Select and emit code to implement store instructions.
475bool X86FastISel::X86SelectStore(Instruction* I) {
476  MVT VT;
477  if (!isTypeLegal(I->getOperand(0)->getType(), TLI, VT))
478    return false;
479  unsigned Val = getRegForValue(I->getOperand(0));
480  if (Val == 0)
481    // Unhandled operand. Halt "fast" selection and bail.
482    return false;
483
484  X86AddressMode AM;
485  if (!X86SelectAddress(I->getOperand(1), AM, false))
486    return false;
487
488  return X86FastEmitStore(VT, Val, AM);
489}
490
491/// X86SelectLoad - Select and emit code to implement load instructions.
492///
493bool X86FastISel::X86SelectLoad(Instruction *I)  {
494  MVT VT;
495  if (!isTypeLegal(I->getType(), TLI, VT))
496    return false;
497
498  X86AddressMode AM;
499  if (!X86SelectAddress(I->getOperand(0), AM, false))
500    return false;
501
502  unsigned ResultReg = 0;
503  if (X86FastEmitLoad(VT, AM, ResultReg)) {
504    UpdateValueMap(I, ResultReg);
505    return true;
506  }
507  return false;
508}
509
510bool X86FastISel::X86SelectCmp(Instruction *I) {
511  CmpInst *CI = cast<CmpInst>(I);
512
513  MVT VT;
514  if (!isTypeLegal(I->getOperand(0)->getType(), TLI, VT))
515    return false;
516
517  unsigned Op0Reg = getRegForValue(CI->getOperand(0));
518  if (Op0Reg == 0) return false;
519  unsigned Op1Reg = getRegForValue(CI->getOperand(1));
520  if (Op1Reg == 0) return false;
521
522  unsigned Opc;
523  switch (VT.getSimpleVT()) {
524  case MVT::i8: Opc = X86::CMP8rr; break;
525  case MVT::i16: Opc = X86::CMP16rr; break;
526  case MVT::i32: Opc = X86::CMP32rr; break;
527  case MVT::i64: Opc = X86::CMP64rr; break;
528  case MVT::f32: Opc = X86::UCOMISSrr; break;
529  case MVT::f64: Opc = X86::UCOMISDrr; break;
530  default: return false;
531  }
532
533  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
534  switch (CI->getPredicate()) {
535  case CmpInst::FCMP_OEQ: {
536    unsigned EReg = createResultReg(&X86::GR8RegClass);
537    unsigned NPReg = createResultReg(&X86::GR8RegClass);
538    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
539    BuildMI(MBB, TII.get(X86::SETEr), EReg);
540    BuildMI(MBB, TII.get(X86::SETNPr), NPReg);
541    BuildMI(MBB, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
542    break;
543  }
544  case CmpInst::FCMP_UNE: {
545    unsigned NEReg = createResultReg(&X86::GR8RegClass);
546    unsigned PReg = createResultReg(&X86::GR8RegClass);
547    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
548    BuildMI(MBB, TII.get(X86::SETNEr), NEReg);
549    BuildMI(MBB, TII.get(X86::SETPr), PReg);
550    BuildMI(MBB, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
551    break;
552  }
553  case CmpInst::FCMP_OGT:
554    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
555    BuildMI(MBB, TII.get(X86::SETAr), ResultReg);
556    break;
557  case CmpInst::FCMP_OGE:
558    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
559    BuildMI(MBB, TII.get(X86::SETAEr), ResultReg);
560    break;
561  case CmpInst::FCMP_OLT:
562    BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
563    BuildMI(MBB, TII.get(X86::SETAr), ResultReg);
564    break;
565  case CmpInst::FCMP_OLE:
566    BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
567    BuildMI(MBB, TII.get(X86::SETAEr), ResultReg);
568    break;
569  case CmpInst::FCMP_ONE:
570    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
571    BuildMI(MBB, TII.get(X86::SETNEr), ResultReg);
572    break;
573  case CmpInst::FCMP_ORD:
574    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
575    BuildMI(MBB, TII.get(X86::SETNPr), ResultReg);
576    break;
577  case CmpInst::FCMP_UNO:
578    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
579    BuildMI(MBB, TII.get(X86::SETPr), ResultReg);
580    break;
581  case CmpInst::FCMP_UEQ:
582    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
583    BuildMI(MBB, TII.get(X86::SETEr), ResultReg);
584    break;
585  case CmpInst::FCMP_UGT:
586    BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
587    BuildMI(MBB, TII.get(X86::SETBr), ResultReg);
588    break;
589  case CmpInst::FCMP_UGE:
590    BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
591    BuildMI(MBB, TII.get(X86::SETBEr), ResultReg);
592    break;
593  case CmpInst::FCMP_ULT:
594    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
595    BuildMI(MBB, TII.get(X86::SETBr), ResultReg);
596    break;
597  case CmpInst::FCMP_ULE:
598    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
599    BuildMI(MBB, TII.get(X86::SETBEr), ResultReg);
600    break;
601  case CmpInst::ICMP_EQ:
602    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
603    BuildMI(MBB, TII.get(X86::SETEr), ResultReg);
604    break;
605  case CmpInst::ICMP_NE:
606    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
607    BuildMI(MBB, TII.get(X86::SETNEr), ResultReg);
608    break;
609  case CmpInst::ICMP_UGT:
610    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
611    BuildMI(MBB, TII.get(X86::SETAr), ResultReg);
612    break;
613  case CmpInst::ICMP_UGE:
614    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
615    BuildMI(MBB, TII.get(X86::SETAEr), ResultReg);
616    break;
617  case CmpInst::ICMP_ULT:
618    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
619    BuildMI(MBB, TII.get(X86::SETBr), ResultReg);
620    break;
621  case CmpInst::ICMP_ULE:
622    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
623    BuildMI(MBB, TII.get(X86::SETBEr), ResultReg);
624    break;
625  case CmpInst::ICMP_SGT:
626    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
627    BuildMI(MBB, TII.get(X86::SETGr), ResultReg);
628    break;
629  case CmpInst::ICMP_SGE:
630    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
631    BuildMI(MBB, TII.get(X86::SETGEr), ResultReg);
632    break;
633  case CmpInst::ICMP_SLT:
634    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
635    BuildMI(MBB, TII.get(X86::SETLr), ResultReg);
636    break;
637  case CmpInst::ICMP_SLE:
638    BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
639    BuildMI(MBB, TII.get(X86::SETLEr), ResultReg);
640    break;
641  default:
642    return false;
643  }
644
645  UpdateValueMap(I, ResultReg);
646  return true;
647}
648
649bool X86FastISel::X86SelectZExt(Instruction *I) {
650  // Special-case hack: The only i1 values we know how to produce currently
651  // set the upper bits of an i8 value to zero.
652  if (I->getType() == Type::Int8Ty &&
653      I->getOperand(0)->getType() == Type::Int1Ty) {
654    unsigned ResultReg = getRegForValue(I->getOperand(0));
655    if (ResultReg == 0) return false;
656    UpdateValueMap(I, ResultReg);
657    return true;
658  }
659
660  return false;
661}
662
663bool X86FastISel::X86SelectBranch(Instruction *I) {
664  BranchInst *BI = cast<BranchInst>(I);
665  // Unconditional branches are selected by tablegen-generated code.
666  unsigned OpReg = getRegForValue(BI->getCondition());
667  if (OpReg == 0) return false;
668  MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
669  MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
670
671  BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
672  BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB);
673  BuildMI(MBB, TII.get(X86::JMP)).addMBB(FalseMBB);
674
675  MBB->addSuccessor(TrueMBB);
676  MBB->addSuccessor(FalseMBB);
677
678  return true;
679}
680
681bool X86FastISel::X86SelectShift(Instruction *I) {
682  unsigned CReg = 0, OpReg = 0, OpImm = 0;
683  const TargetRegisterClass *RC = NULL;
684  if (I->getType() == Type::Int8Ty) {
685    CReg = X86::CL;
686    RC = &X86::GR8RegClass;
687    switch (I->getOpcode()) {
688    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
689    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
690    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
691    default: return false;
692    }
693  } else if (I->getType() == Type::Int16Ty) {
694    CReg = X86::CX;
695    RC = &X86::GR16RegClass;
696    switch (I->getOpcode()) {
697    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
698    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
699    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
700    default: return false;
701    }
702  } else if (I->getType() == Type::Int32Ty) {
703    CReg = X86::ECX;
704    RC = &X86::GR32RegClass;
705    switch (I->getOpcode()) {
706    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
707    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
708    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
709    default: return false;
710    }
711  } else if (I->getType() == Type::Int64Ty) {
712    CReg = X86::RCX;
713    RC = &X86::GR64RegClass;
714    switch (I->getOpcode()) {
715    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
716    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
717    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
718    default: return false;
719    }
720  } else {
721    return false;
722  }
723
724  MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);
725  if (VT == MVT::Other || !isTypeLegal(I->getType(), TLI, VT))
726    return false;
727
728  unsigned Op0Reg = getRegForValue(I->getOperand(0));
729  if (Op0Reg == 0) return false;
730
731  // Fold immediate in shl(x,3).
732  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
733    unsigned ResultReg = createResultReg(RC);
734    BuildMI(MBB, TII.get(OpImm),
735            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue());
736    UpdateValueMap(I, ResultReg);
737    return true;
738  }
739
740  unsigned Op1Reg = getRegForValue(I->getOperand(1));
741  if (Op1Reg == 0) return false;
742  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
743  unsigned ResultReg = createResultReg(RC);
744  BuildMI(MBB, TII.get(OpReg), ResultReg).addReg(Op0Reg);
745  UpdateValueMap(I, ResultReg);
746  return true;
747}
748
749bool X86FastISel::X86SelectSelect(Instruction *I) {
750  const Type *Ty = I->getType();
751  if (isa<PointerType>(Ty))
752    Ty = TD.getIntPtrType();
753
754  unsigned Opc = 0;
755  const TargetRegisterClass *RC = NULL;
756  if (Ty == Type::Int16Ty) {
757    Opc = X86::CMOVE16rr;
758    RC = &X86::GR16RegClass;
759  } else if (Ty == Type::Int32Ty) {
760    Opc = X86::CMOVE32rr;
761    RC = &X86::GR32RegClass;
762  } else if (Ty == Type::Int64Ty) {
763    Opc = X86::CMOVE64rr;
764    RC = &X86::GR64RegClass;
765  } else {
766    return false;
767  }
768
769  MVT VT = MVT::getMVT(Ty, /*HandleUnknown=*/true);
770  if (VT == MVT::Other || !isTypeLegal(Ty, TLI, VT))
771    return false;
772
773  unsigned Op0Reg = getRegForValue(I->getOperand(0));
774  if (Op0Reg == 0) return false;
775  unsigned Op1Reg = getRegForValue(I->getOperand(1));
776  if (Op1Reg == 0) return false;
777  unsigned Op2Reg = getRegForValue(I->getOperand(2));
778  if (Op2Reg == 0) return false;
779
780  BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
781  unsigned ResultReg = createResultReg(RC);
782  BuildMI(MBB, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
783  UpdateValueMap(I, ResultReg);
784  return true;
785}
786
787bool X86FastISel::X86SelectFPExt(Instruction *I) {
788  if (Subtarget->hasSSE2()) {
789    if (I->getType() == Type::DoubleTy) {
790      Value *V = I->getOperand(0);
791      if (V->getType() == Type::FloatTy) {
792        unsigned OpReg = getRegForValue(V);
793        if (OpReg == 0) return false;
794        unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
795        BuildMI(MBB, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
796        UpdateValueMap(I, ResultReg);
797        return true;
798      }
799    }
800  }
801
802  return false;
803}
804
805bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
806  if (Subtarget->hasSSE2()) {
807    if (I->getType() == Type::FloatTy) {
808      Value *V = I->getOperand(0);
809      if (V->getType() == Type::DoubleTy) {
810        unsigned OpReg = getRegForValue(V);
811        if (OpReg == 0) return false;
812        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
813        BuildMI(MBB, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
814        UpdateValueMap(I, ResultReg);
815        return true;
816      }
817    }
818  }
819
820  return false;
821}
822
823bool X86FastISel::X86SelectTrunc(Instruction *I) {
824  if (Subtarget->is64Bit())
825    // All other cases should be handled by the tblgen generated code.
826    return false;
827  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
828  MVT DstVT = TLI.getValueType(I->getType());
829  if (DstVT != MVT::i8)
830    // All other cases should be handled by the tblgen generated code.
831    return false;
832  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
833    // All other cases should be handled by the tblgen generated code.
834    return false;
835
836  unsigned InputReg = getRegForValue(I->getOperand(0));
837  if (!InputReg)
838    // Unhandled operand.  Halt "fast" selection and bail.
839    return false;
840
841  // First issue a copy to GR16_ or GR32_.
842  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16to16_ : X86::MOV32to32_;
843  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
844    ? X86::GR16_RegisterClass : X86::GR32_RegisterClass;
845  unsigned CopyReg = createResultReg(CopyRC);
846  BuildMI(MBB, TII.get(CopyOpc), CopyReg).addReg(InputReg);
847
848  // Then issue an extract_subreg.
849  unsigned ResultReg = FastEmitInst_extractsubreg(CopyReg,1); // x86_subreg_8bit
850  if (!ResultReg)
851    return false;
852
853  UpdateValueMap(I, ResultReg);
854  return true;
855}
856
857bool X86FastISel::X86SelectCall(Instruction *I) {
858  CallInst *CI = cast<CallInst>(I);
859  Value *Callee = I->getOperand(0);
860
861  // Can't handle inline asm yet.
862  if (isa<InlineAsm>(Callee))
863    return false;
864
865  // FIXME: Handle some intrinsics.
866  if (Function *F = CI->getCalledFunction()) {
867    if (F->isDeclaration() &&F->getIntrinsicID())
868      return false;
869  }
870
871  // Handle only C and fastcc calling conventions for now.
872  CallSite CS(CI);
873  unsigned CC = CS.getCallingConv();
874  if (CC != CallingConv::C &&
875      CC != CallingConv::Fast &&
876      CC != CallingConv::X86_FastCall)
877    return false;
878
879  // Let SDISel handle vararg functions.
880  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
881  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
882  if (FTy->isVarArg())
883    return false;
884
885  // Handle *simple* calls for now.
886  const Type *RetTy = CS.getType();
887  MVT RetVT;
888  if (RetTy == Type::VoidTy)
889    RetVT = MVT::isVoid;
890  else if (!isTypeLegal(RetTy, TLI, RetVT, true))
891    return false;
892
893  // Materialize callee address in a register. FIXME: GV address can be
894  // handled with a CALLpcrel32 instead.
895  X86AddressMode CalleeAM;
896  if (!X86SelectAddress(Callee, CalleeAM, true))
897    return false;
898  unsigned CalleeOp = 0;
899  GlobalValue *GV = 0;
900  if (CalleeAM.Base.Reg != 0) {
901    assert(CalleeAM.GV == 0);
902    CalleeOp = CalleeAM.Base.Reg;
903  } else if (CalleeAM.GV != 0) {
904    assert(CalleeAM.GV != 0);
905    GV = CalleeAM.GV;
906  } else
907    return false;
908
909  // Allow calls which produce i1 results.
910  bool AndToI1 = false;
911  if (RetVT == MVT::i1) {
912    RetVT = MVT::i8;
913    AndToI1 = true;
914  }
915
916  // Deal with call operands first.
917  SmallVector<unsigned, 4> Args;
918  SmallVector<MVT, 4> ArgVTs;
919  SmallVector<ISD::ArgFlagsTy, 4> ArgFlags;
920  Args.reserve(CS.arg_size());
921  ArgVTs.reserve(CS.arg_size());
922  ArgFlags.reserve(CS.arg_size());
923  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
924       i != e; ++i) {
925    unsigned Arg = getRegForValue(*i);
926    if (Arg == 0)
927      return false;
928    ISD::ArgFlagsTy Flags;
929    unsigned AttrInd = i - CS.arg_begin() + 1;
930    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
931      Flags.setSExt();
932    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
933      Flags.setZExt();
934
935    // FIXME: Only handle *easy* calls for now.
936    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
937        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
938        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
939        CS.paramHasAttr(AttrInd, Attribute::ByVal))
940      return false;
941
942    const Type *ArgTy = (*i)->getType();
943    MVT ArgVT;
944    if (!isTypeLegal(ArgTy, TLI, ArgVT))
945      return false;
946    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
947    Flags.setOrigAlign(OriginalAlignment);
948
949    Args.push_back(Arg);
950    ArgVTs.push_back(ArgVT);
951    ArgFlags.push_back(Flags);
952  }
953
954  // Analyze operands of the call, assigning locations to each operand.
955  SmallVector<CCValAssign, 16> ArgLocs;
956  CCState CCInfo(CC, false, TM, ArgLocs);
957  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
958
959  // Get a count of how many bytes are to be pushed on the stack.
960  unsigned NumBytes = CCInfo.getNextStackOffset();
961
962  // Issue CALLSEQ_START
963  BuildMI(MBB, TII.get(X86::ADJCALLSTACKDOWN)).addImm(NumBytes);
964
965  // Process argumenet: walk the register/memloc assignments, inserting
966  // copies / loads.
967  SmallVector<unsigned, 4> RegArgs;
968  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
969    CCValAssign &VA = ArgLocs[i];
970    unsigned Arg = Args[VA.getValNo()];
971    MVT ArgVT = ArgVTs[VA.getValNo()];
972
973    // Promote the value if needed.
974    switch (VA.getLocInfo()) {
975    default: assert(0 && "Unknown loc info!");
976    case CCValAssign::Full: break;
977    case CCValAssign::SExt: {
978      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
979                                       Arg, ArgVT, Arg);
980      assert(Emitted && "Failed to emit a sext!");
981      ArgVT = VA.getLocVT();
982      break;
983    }
984    case CCValAssign::ZExt: {
985      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
986                                       Arg, ArgVT, Arg);
987      assert(Emitted && "Failed to emit a zext!");
988      ArgVT = VA.getLocVT();
989      break;
990    }
991    case CCValAssign::AExt: {
992      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
993                                       Arg, ArgVT, Arg);
994      if (!Emitted)
995        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
996                                         Arg, ArgVT, Arg);
997      if (!Emitted)
998        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
999                                    Arg, ArgVT, Arg);
1000
1001      assert(Emitted && "Failed to emit a aext!");
1002      ArgVT = VA.getLocVT();
1003      break;
1004    }
1005    }
1006
1007    if (VA.isRegLoc()) {
1008      TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
1009      bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
1010                                      Arg, RC, RC);
1011      assert(Emitted && "Failed to emit a copy instruction!");
1012      RegArgs.push_back(VA.getLocReg());
1013    } else {
1014      unsigned LocMemOffset = VA.getLocMemOffset();
1015      X86AddressMode AM;
1016      AM.Base.Reg = StackPtr;
1017      AM.Disp = LocMemOffset;
1018      X86FastEmitStore(ArgVT, Arg, AM);
1019    }
1020  }
1021
1022  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1023  // GOT pointer.
1024  if (!Subtarget->is64Bit() &&
1025      TM.getRelocationModel() == Reloc::PIC_ &&
1026      Subtarget->isPICStyleGOT()) {
1027    TargetRegisterClass *RC = X86::GR32RegisterClass;
1028    unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
1029    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
1030    assert(Emitted && "Failed to emit a copy instruction!");
1031  }
1032
1033  // Issue the call.
1034  unsigned CallOpc = CalleeOp
1035    ? (Subtarget->is64Bit() ? X86::CALL64r       : X86::CALL32r)
1036    : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32);
1037  MachineInstrBuilder MIB = CalleeOp
1038    ? BuildMI(MBB, TII.get(CallOpc)).addReg(CalleeOp)
1039    : BuildMI(MBB, TII.get(CallOpc)).addGlobalAddress(GV);
1040
1041  // Add an implicit use GOT pointer in EBX.
1042  if (!Subtarget->is64Bit() &&
1043      TM.getRelocationModel() == Reloc::PIC_ &&
1044      Subtarget->isPICStyleGOT())
1045    MIB.addReg(X86::EBX);
1046
1047  // Add implicit physical register uses to the call.
1048  while (!RegArgs.empty()) {
1049    MIB.addReg(RegArgs.back());
1050    RegArgs.pop_back();
1051  }
1052
1053  // Issue CALLSEQ_END
1054  BuildMI(MBB, TII.get(X86::ADJCALLSTACKUP)).addImm(NumBytes).addImm(0);
1055
1056  // Now handle call return value (if any).
1057  if (RetVT.getSimpleVT() != MVT::isVoid) {
1058    SmallVector<CCValAssign, 16> RVLocs;
1059    CCState CCInfo(CC, false, TM, RVLocs);
1060    CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
1061
1062    // Copy all of the result registers out of their specified physreg.
1063    assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1064    MVT CopyVT = RVLocs[0].getValVT();
1065    TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1066    TargetRegisterClass *SrcRC = DstRC;
1067
1068    // If this is a call to a function that returns an fp value on the x87 fp
1069    // stack, but where we prefer to use the value in xmm registers, copy it
1070    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1071    if ((RVLocs[0].getLocReg() == X86::ST0 ||
1072         RVLocs[0].getLocReg() == X86::ST1) &&
1073        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
1074      CopyVT = MVT::f80;
1075      SrcRC = X86::RSTRegisterClass;
1076      DstRC = X86::RFP80RegisterClass;
1077    }
1078
1079    unsigned ResultReg = createResultReg(DstRC);
1080    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
1081                                    RVLocs[0].getLocReg(), DstRC, SrcRC);
1082    assert(Emitted && "Failed to emit a copy instruction!");
1083    if (CopyVT != RVLocs[0].getValVT()) {
1084      // Round the F80 the right size, which also moves to the appropriate xmm
1085      // register. This is accomplished by storing the F80 value in memory and
1086      // then loading it back. Ewww...
1087      MVT ResVT = RVLocs[0].getValVT();
1088      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
1089      unsigned MemSize = ResVT.getSizeInBits()/8;
1090      int FI = MFI.CreateStackObject(MemSize, MemSize);
1091      addFrameReference(BuildMI(MBB, TII.get(Opc)), FI).addReg(ResultReg);
1092      DstRC = ResVT == MVT::f32
1093        ? X86::FR32RegisterClass : X86::FR64RegisterClass;
1094      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
1095      ResultReg = createResultReg(DstRC);
1096      addFrameReference(BuildMI(MBB, TII.get(Opc), ResultReg), FI);
1097    }
1098
1099    if (AndToI1) {
1100      // Mask out all but lowest bit for some call which produces an i1.
1101      unsigned AndResult = createResultReg(X86::GR8RegisterClass);
1102      BuildMI(MBB, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
1103      ResultReg = AndResult;
1104    }
1105
1106    UpdateValueMap(I, ResultReg);
1107  }
1108
1109  return true;
1110}
1111
1112
1113bool
1114X86FastISel::TargetSelectInstruction(Instruction *I)  {
1115  switch (I->getOpcode()) {
1116  default: break;
1117  case Instruction::Load:
1118    return X86SelectLoad(I);
1119  case Instruction::Store:
1120    return X86SelectStore(I);
1121  case Instruction::ICmp:
1122  case Instruction::FCmp:
1123    return X86SelectCmp(I);
1124  case Instruction::ZExt:
1125    return X86SelectZExt(I);
1126  case Instruction::Br:
1127    return X86SelectBranch(I);
1128  case Instruction::Call:
1129    return X86SelectCall(I);
1130  case Instruction::LShr:
1131  case Instruction::AShr:
1132  case Instruction::Shl:
1133    return X86SelectShift(I);
1134  case Instruction::Select:
1135    return X86SelectSelect(I);
1136  case Instruction::Trunc:
1137    return X86SelectTrunc(I);
1138  case Instruction::FPExt:
1139    return X86SelectFPExt(I);
1140  case Instruction::FPTrunc:
1141    return X86SelectFPTrunc(I);
1142  }
1143
1144  return false;
1145}
1146
1147unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
1148  MVT VT;
1149  if (!isTypeLegal(C->getType(), TLI, VT))
1150    return false;
1151
1152  // Get opcode and regclass of the output for the given load instruction.
1153  unsigned Opc = 0;
1154  const TargetRegisterClass *RC = NULL;
1155  switch (VT.getSimpleVT()) {
1156  default: return false;
1157  case MVT::i8:
1158    Opc = X86::MOV8rm;
1159    RC  = X86::GR8RegisterClass;
1160    break;
1161  case MVT::i16:
1162    Opc = X86::MOV16rm;
1163    RC  = X86::GR16RegisterClass;
1164    break;
1165  case MVT::i32:
1166    Opc = X86::MOV32rm;
1167    RC  = X86::GR32RegisterClass;
1168    break;
1169  case MVT::i64:
1170    // Must be in x86-64 mode.
1171    Opc = X86::MOV64rm;
1172    RC  = X86::GR64RegisterClass;
1173    break;
1174  case MVT::f32:
1175    if (Subtarget->hasSSE1()) {
1176      Opc = X86::MOVSSrm;
1177      RC  = X86::FR32RegisterClass;
1178    } else {
1179      Opc = X86::LD_Fp32m;
1180      RC  = X86::RFP32RegisterClass;
1181    }
1182    break;
1183  case MVT::f64:
1184    if (Subtarget->hasSSE2()) {
1185      Opc = X86::MOVSDrm;
1186      RC  = X86::FR64RegisterClass;
1187    } else {
1188      Opc = X86::LD_Fp64m;
1189      RC  = X86::RFP64RegisterClass;
1190    }
1191    break;
1192  case MVT::f80:
1193    // No f80 support yet.
1194    return false;
1195  }
1196
1197  // Materialize addresses with LEA instructions.
1198  if (isa<GlobalValue>(C)) {
1199    X86AddressMode AM;
1200    if (X86SelectAddress(C, AM, false)) {
1201      if (TLI.getPointerTy() == MVT::i32)
1202        Opc = X86::LEA32r;
1203      else
1204        Opc = X86::LEA64r;
1205      unsigned ResultReg = createResultReg(RC);
1206      addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM);
1207      return ResultReg;
1208    }
1209    return 0;
1210  }
1211
1212  // MachineConstantPool wants an explicit alignment.
1213  unsigned Align = TD.getPreferredTypeAlignmentShift(C->getType());
1214  if (Align == 0) {
1215    // Alignment of vector types.  FIXME!
1216    Align = TD.getABITypeSize(C->getType());
1217    Align = Log2_64(Align);
1218  }
1219
1220  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
1221  unsigned ResultReg = createResultReg(RC);
1222  addConstantPoolReference(BuildMI(MBB, TII.get(Opc), ResultReg), MCPOffset);
1223  return ResultReg;
1224}
1225
1226unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
1227  X86AddressMode AM;
1228  if (!X86SelectAddress(C, AM, false))
1229    return 0;
1230  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
1231  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
1232  unsigned ResultReg = createResultReg(RC);
1233  addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM);
1234  return ResultReg;
1235}
1236
1237namespace llvm {
1238  llvm::FastISel *X86::createFastISel(MachineFunction &mf,
1239                        MachineModuleInfo *mmi,
1240                        DenseMap<const Value *, unsigned> &vm,
1241                        DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
1242                        DenseMap<const AllocaInst *, int> &am) {
1243    return new X86FastISel(mf, mmi, vm, bm, am);
1244  }
1245}
1246