X86FastISel.cpp revision d1474d09cbe5fdeec8ba0d6c6b52f316f3422532
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/CallingConv.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/GlobalVariable.h"
25#include "llvm/Instructions.h"
26#include "llvm/IntrinsicInst.h"
27#include "llvm/CodeGen/FastISel.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/CallSite.h"
32#include "llvm/Support/GetElementPtrTypeIterator.h"
33#include "llvm/Target/TargetOptions.h"
34using namespace llvm;
35
36namespace {
37
38class X86FastISel : public FastISel {
39  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const X86Subtarget *Subtarget;
42
43  /// StackPtr - Register used as the stack pointer.
44  ///
45  unsigned StackPtr;
46
47  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
48  /// floating point ops.
49  /// When SSE is available, use it for f32 operations.
50  /// When SSE2 is available, use it for f64 operations.
51  bool X86ScalarSSEf64;
52  bool X86ScalarSSEf32;
53
54public:
55  explicit X86FastISel(MachineFunction &mf,
56                       MachineModuleInfo *mmi,
57                       DwarfWriter *dw,
58                       DenseMap<const Value *, unsigned> &vm,
59                       DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
60                       DenseMap<const AllocaInst *, int> &am
61#ifndef NDEBUG
62                       , SmallSet<Instruction*, 8> &cil
63#endif
64                       )
65    : FastISel(mf, mmi, dw, vm, bm, am
66#ifndef NDEBUG
67               , cil
68#endif
69               ) {
70    Subtarget = &TM.getSubtarget<X86Subtarget>();
71    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
72    X86ScalarSSEf64 = Subtarget->hasSSE2();
73    X86ScalarSSEf32 = Subtarget->hasSSE1();
74  }
75
76  virtual bool TargetSelectInstruction(Instruction *I);
77
78#include "X86GenFastISel.inc"
79
80private:
81  bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT);
82
83  bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR);
84
85  bool X86FastEmitStore(MVT VT, Value *Val,
86                        const X86AddressMode &AM);
87  bool X86FastEmitStore(MVT VT, unsigned Val,
88                        const X86AddressMode &AM);
89
90  bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT,
91                         unsigned &ResultReg);
92
93  bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall);
94
95  bool X86SelectLoad(Instruction *I);
96
97  bool X86SelectStore(Instruction *I);
98
99  bool X86SelectCmp(Instruction *I);
100
101  bool X86SelectZExt(Instruction *I);
102
103  bool X86SelectBranch(Instruction *I);
104
105  bool X86SelectShift(Instruction *I);
106
107  bool X86SelectSelect(Instruction *I);
108
109  bool X86SelectTrunc(Instruction *I);
110
111  bool X86SelectFPExt(Instruction *I);
112  bool X86SelectFPTrunc(Instruction *I);
113
114  bool X86SelectExtractValue(Instruction *I);
115
116  bool X86VisitIntrinsicCall(IntrinsicInst &I);
117  bool X86SelectCall(Instruction *I);
118
119  CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
120
121  const X86InstrInfo *getInstrInfo() const {
122    return getTargetMachine()->getInstrInfo();
123  }
124  const X86TargetMachine *getTargetMachine() const {
125    return static_cast<const X86TargetMachine *>(&TM);
126  }
127
128  unsigned TargetMaterializeConstant(Constant *C);
129
130  unsigned TargetMaterializeAlloca(AllocaInst *C);
131
132  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
133  /// computed in an SSE register, not on the X87 floating point stack.
134  bool isScalarFPTypeInSSEReg(MVT VT) const {
135    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
136      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
137  }
138
139  bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
140};
141
142} // end anonymous namespace.
143
144bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
145  VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
146  if (VT == MVT::Other || !VT.isSimple())
147    // Unhandled type. Halt "fast" selection and bail.
148    return false;
149
150  // For now, require SSE/SSE2 for performing floating-point operations,
151  // since x87 requires additional work.
152  if (VT == MVT::f64 && !X86ScalarSSEf64)
153     return false;
154  if (VT == MVT::f32 && !X86ScalarSSEf32)
155     return false;
156  // Similarly, no f80 support yet.
157  if (VT == MVT::f80)
158    return false;
159  // We only handle legal types. For example, on x86-32 the instruction
160  // selector contains all of the 64-bit instructions from x86-64,
161  // under the assumption that i64 won't be used if the target doesn't
162  // support it.
163  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
164}
165
166#include "X86GenCallingConv.inc"
167
168/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
169/// convention.
170CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
171  if (Subtarget->is64Bit()) {
172    if (Subtarget->isTargetWin64())
173      return CC_X86_Win64_C;
174    else
175      return CC_X86_64_C;
176  }
177
178  if (CC == CallingConv::X86_FastCall)
179    return CC_X86_32_FastCall;
180  else if (CC == CallingConv::Fast)
181    return CC_X86_32_FastCC;
182  else
183    return CC_X86_32_C;
184}
185
186/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
187/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
188/// Return true and the result register by reference if it is possible.
189bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
190                                  unsigned &ResultReg) {
191  // Get opcode and regclass of the output for the given load instruction.
192  unsigned Opc = 0;
193  const TargetRegisterClass *RC = NULL;
194  switch (VT.getSimpleVT()) {
195  default: return false;
196  case MVT::i8:
197    Opc = X86::MOV8rm;
198    RC  = X86::GR8RegisterClass;
199    break;
200  case MVT::i16:
201    Opc = X86::MOV16rm;
202    RC  = X86::GR16RegisterClass;
203    break;
204  case MVT::i32:
205    Opc = X86::MOV32rm;
206    RC  = X86::GR32RegisterClass;
207    break;
208  case MVT::i64:
209    // Must be in x86-64 mode.
210    Opc = X86::MOV64rm;
211    RC  = X86::GR64RegisterClass;
212    break;
213  case MVT::f32:
214    if (Subtarget->hasSSE1()) {
215      Opc = X86::MOVSSrm;
216      RC  = X86::FR32RegisterClass;
217    } else {
218      Opc = X86::LD_Fp32m;
219      RC  = X86::RFP32RegisterClass;
220    }
221    break;
222  case MVT::f64:
223    if (Subtarget->hasSSE2()) {
224      Opc = X86::MOVSDrm;
225      RC  = X86::FR64RegisterClass;
226    } else {
227      Opc = X86::LD_Fp64m;
228      RC  = X86::RFP64RegisterClass;
229    }
230    break;
231  case MVT::f80:
232    // No f80 support yet.
233    return false;
234  }
235
236  ResultReg = createResultReg(RC);
237  addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
238  return true;
239}
240
241/// X86FastEmitStore - Emit a machine instruction to store a value Val of
242/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
243/// and a displacement offset, or a GlobalAddress,
244/// i.e. V. Return true if it is possible.
245bool
246X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
247                              const X86AddressMode &AM) {
248  // Get opcode and regclass of the output for the given store instruction.
249  unsigned Opc = 0;
250  switch (VT.getSimpleVT()) {
251  case MVT::f80: // No f80 support yet.
252  default: return false;
253  case MVT::i8:  Opc = X86::MOV8mr;  break;
254  case MVT::i16: Opc = X86::MOV16mr; break;
255  case MVT::i32: Opc = X86::MOV32mr; break;
256  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
257  case MVT::f32:
258    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
259    break;
260  case MVT::f64:
261    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
262    break;
263  }
264
265  addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
266  return true;
267}
268
269bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
270                                   const X86AddressMode &AM) {
271  // Handle 'null' like i32/i64 0.
272  if (isa<ConstantPointerNull>(Val))
273    Val = Constant::getNullValue(TD.getIntPtrType());
274
275  // If this is a store of a simple constant, fold the constant into the store.
276  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
277    unsigned Opc = 0;
278    switch (VT.getSimpleVT()) {
279    default: break;
280    case MVT::i8:  Opc = X86::MOV8mi;  break;
281    case MVT::i16: Opc = X86::MOV16mi; break;
282    case MVT::i32: Opc = X86::MOV32mi; break;
283    case MVT::i64:
284      // Must be a 32-bit sign extended value.
285      if ((int)CI->getSExtValue() == CI->getSExtValue())
286        Opc = X86::MOV64mi32;
287      break;
288    }
289
290    if (Opc) {
291      addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
292                             .addImm(CI->getSExtValue());
293      return true;
294    }
295  }
296
297  unsigned ValReg = getRegForValue(Val);
298  if (ValReg == 0)
299    return false;
300
301  return X86FastEmitStore(VT, ValReg, AM);
302}
303
304/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
305/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
306/// ISD::SIGN_EXTEND).
307bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
308                                    unsigned Src, MVT SrcVT,
309                                    unsigned &ResultReg) {
310  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
311
312  if (RR != 0) {
313    ResultReg = RR;
314    return true;
315  } else
316    return false;
317}
318
319/// X86SelectAddress - Attempt to fill in an address from the given value.
320///
321bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
322  User *U = NULL;
323  unsigned Opcode = Instruction::UserOp1;
324  if (Instruction *I = dyn_cast<Instruction>(V)) {
325    Opcode = I->getOpcode();
326    U = I;
327  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
328    Opcode = C->getOpcode();
329    U = C;
330  }
331
332  switch (Opcode) {
333  default: break;
334  case Instruction::BitCast:
335    // Look past bitcasts.
336    return X86SelectAddress(U->getOperand(0), AM, isCall);
337
338  case Instruction::IntToPtr:
339    // Look past no-op inttoptrs.
340    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
341      return X86SelectAddress(U->getOperand(0), AM, isCall);
342    break;
343
344  case Instruction::PtrToInt:
345    // Look past no-op ptrtoints.
346    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
347      return X86SelectAddress(U->getOperand(0), AM, isCall);
348    break;
349
350  case Instruction::Alloca: {
351    if (isCall) break;
352    // Do static allocas.
353    const AllocaInst *A = cast<AllocaInst>(V);
354    DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
355    if (SI != StaticAllocaMap.end()) {
356      AM.BaseType = X86AddressMode::FrameIndexBase;
357      AM.Base.FrameIndex = SI->second;
358      return true;
359    }
360    break;
361  }
362
363  case Instruction::Add: {
364    if (isCall) break;
365    // Adds of constants are common and easy enough.
366    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
367      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
368      // They have to fit in the 32-bit signed displacement field though.
369      if (isInt32(Disp)) {
370        AM.Disp = (uint32_t)Disp;
371        return X86SelectAddress(U->getOperand(0), AM, isCall);
372      }
373    }
374    break;
375  }
376
377  case Instruction::GetElementPtr: {
378    if (isCall) break;
379    // Pattern-match simple GEPs.
380    uint64_t Disp = (int32_t)AM.Disp;
381    unsigned IndexReg = AM.IndexReg;
382    unsigned Scale = AM.Scale;
383    gep_type_iterator GTI = gep_type_begin(U);
384    // Iterate through the indices, folding what we can. Constants can be
385    // folded, and one dynamic index can be handled, if the scale is supported.
386    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
387         i != e; ++i, ++GTI) {
388      Value *Op = *i;
389      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
390        const StructLayout *SL = TD.getStructLayout(STy);
391        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
392        Disp += SL->getElementOffset(Idx);
393      } else {
394        uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
395        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
396          // Constant-offset addressing.
397          Disp += CI->getSExtValue() * S;
398        } else if (IndexReg == 0 &&
399                   (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
400                   (S == 1 || S == 2 || S == 4 || S == 8)) {
401          // Scaled-index addressing.
402          Scale = S;
403          IndexReg = getRegForGEPIndex(Op);
404          if (IndexReg == 0)
405            return false;
406        } else
407          // Unsupported.
408          goto unsupported_gep;
409      }
410    }
411    // Check for displacement overflow.
412    if (!isInt32(Disp))
413      break;
414    // Ok, the GEP indices were covered by constant-offset and scaled-index
415    // addressing. Update the address state and move on to examining the base.
416    AM.IndexReg = IndexReg;
417    AM.Scale = Scale;
418    AM.Disp = (uint32_t)Disp;
419    return X86SelectAddress(U->getOperand(0), AM, isCall);
420  unsupported_gep:
421    // Ok, the GEP indices weren't all covered.
422    break;
423  }
424  }
425
426  // Handle constant address.
427  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
428    // Can't handle alternate code models yet.
429    if (TM.getCodeModel() != CodeModel::Default &&
430        TM.getCodeModel() != CodeModel::Small)
431      return false;
432
433    // RIP-relative addresses can't have additional register operands.
434    if (Subtarget->isPICStyleRIPRel() &&
435        (AM.Base.Reg != 0 || AM.IndexReg != 0))
436      return false;
437
438    // Can't handle TLS yet.
439    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
440      if (GVar->isThreadLocal())
441        return false;
442
443    // Okay, we've committed to selecting this global. Set up the basic address.
444    AM.GV = GV;
445
446    if (!isCall &&
447        TM.getRelocationModel() == Reloc::PIC_ &&
448        !Subtarget->is64Bit()) {
449      // FIXME: How do we know Base.Reg is free??
450      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
451    }
452
453    // If the ABI doesn't require an extra load, return a direct reference to
454    // the global.
455    if (!Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
456      if (Subtarget->isPICStyleRIPRel()) {
457        // Use rip-relative addressing if we can.  Above we verified that the
458        // base and index registers are unused.
459        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
460        AM.Base.Reg = X86::RIP;
461      } else if (Subtarget->isPICStyleStub() &&
462                 TM.getRelocationModel() == Reloc::PIC_) {
463        AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
464      } else if (Subtarget->isPICStyleGOT()) {
465        AM.GVOpFlags = X86II::MO_GOTOFF;
466      }
467
468      return true;
469    }
470
471    // Check to see if we've already materialized this stub loaded value into a
472    // register in this block.  If so, just reuse it.
473    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
474    unsigned LoadReg;
475    if (I != LocalValueMap.end() && I->second != 0) {
476      LoadReg = I->second;
477    } else {
478      // Issue load from stub.
479      unsigned Opc = 0;
480      const TargetRegisterClass *RC = NULL;
481      X86AddressMode StubAM;
482      StubAM.Base.Reg = AM.Base.Reg;
483      StubAM.GV = GV;
484
485      if (TLI.getPointerTy() == MVT::i64) {
486        Opc = X86::MOV64rm;
487        RC  = X86::GR64RegisterClass;
488
489        if (Subtarget->isPICStyleRIPRel()) {
490          StubAM.GVOpFlags = X86II::MO_GOTPCREL;
491          StubAM.Base.Reg = X86::RIP;
492        }
493
494      } else {
495        Opc = X86::MOV32rm;
496        RC  = X86::GR32RegisterClass;
497
498        if (Subtarget->isPICStyleGOT())
499          StubAM.GVOpFlags = X86II::MO_GOT;
500        else if (Subtarget->isPICStyleStub()) {
501          // In darwin, we have multiple different stub types, and we have both
502          // PIC and -mdynamic-no-pic.  Determine whether we have a stub
503          // reference and/or whether the reference is relative to the PIC base
504          // or not.
505          bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
506
507          if (!GV->hasHiddenVisibility()) {
508            // Non-hidden $non_lazy_ptr reference.
509            StubAM.GVOpFlags = IsPIC ? X86II::MO_DARWIN_NONLAZY_PIC_BASE :
510                                       X86II::MO_DARWIN_NONLAZY;
511          } else {
512            // Hidden $non_lazy_ptr reference.
513            StubAM.GVOpFlags = IsPIC ? X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
514                                       X86II::MO_DARWIN_HIDDEN_NONLAZY;
515          }
516        }
517      }
518
519      LoadReg = createResultReg(RC);
520      addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
521
522      // Prevent loading GV stub multiple times in same MBB.
523      LocalValueMap[V] = LoadReg;
524    }
525
526    // Now construct the final address. Note that the Disp, Scale,
527    // and Index values may already be set here.
528    AM.Base.Reg = LoadReg;
529    AM.GV = 0;
530    return true;
531  }
532
533  // If all else fails, try to materialize the value in a register.
534  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
535    if (AM.Base.Reg == 0) {
536      AM.Base.Reg = getRegForValue(V);
537      return AM.Base.Reg != 0;
538    }
539    if (AM.IndexReg == 0) {
540      assert(AM.Scale == 1 && "Scale with no index!");
541      AM.IndexReg = getRegForValue(V);
542      return AM.IndexReg != 0;
543    }
544  }
545
546  return false;
547}
548
549/// X86SelectStore - Select and emit code to implement store instructions.
550bool X86FastISel::X86SelectStore(Instruction* I) {
551  MVT VT;
552  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
553    return false;
554
555  X86AddressMode AM;
556  if (!X86SelectAddress(I->getOperand(1), AM, false))
557    return false;
558
559  return X86FastEmitStore(VT, I->getOperand(0), AM);
560}
561
562/// X86SelectLoad - Select and emit code to implement load instructions.
563///
564bool X86FastISel::X86SelectLoad(Instruction *I)  {
565  MVT VT;
566  if (!isTypeLegal(I->getType(), VT))
567    return false;
568
569  X86AddressMode AM;
570  if (!X86SelectAddress(I->getOperand(0), AM, false))
571    return false;
572
573  unsigned ResultReg = 0;
574  if (X86FastEmitLoad(VT, AM, ResultReg)) {
575    UpdateValueMap(I, ResultReg);
576    return true;
577  }
578  return false;
579}
580
581static unsigned X86ChooseCmpOpcode(MVT VT) {
582  switch (VT.getSimpleVT()) {
583  default:       return 0;
584  case MVT::i8:  return X86::CMP8rr;
585  case MVT::i16: return X86::CMP16rr;
586  case MVT::i32: return X86::CMP32rr;
587  case MVT::i64: return X86::CMP64rr;
588  case MVT::f32: return X86::UCOMISSrr;
589  case MVT::f64: return X86::UCOMISDrr;
590  }
591}
592
593/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
594/// of the comparison, return an opcode that works for the compare (e.g.
595/// CMP32ri) otherwise return 0.
596static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
597  switch (VT.getSimpleVT()) {
598  // Otherwise, we can't fold the immediate into this comparison.
599  default: return 0;
600  case MVT::i8: return X86::CMP8ri;
601  case MVT::i16: return X86::CMP16ri;
602  case MVT::i32: return X86::CMP32ri;
603  case MVT::i64:
604    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
605    // field.
606    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
607      return X86::CMP64ri32;
608    return 0;
609  }
610}
611
612bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
613  unsigned Op0Reg = getRegForValue(Op0);
614  if (Op0Reg == 0) return false;
615
616  // Handle 'null' like i32/i64 0.
617  if (isa<ConstantPointerNull>(Op1))
618    Op1 = Constant::getNullValue(TD.getIntPtrType());
619
620  // We have two options: compare with register or immediate.  If the RHS of
621  // the compare is an immediate that we can fold into this compare, use
622  // CMPri, otherwise use CMPrr.
623  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
624    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
625      BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
626                                          .addImm(Op1C->getSExtValue());
627      return true;
628    }
629  }
630
631  unsigned CompareOpc = X86ChooseCmpOpcode(VT);
632  if (CompareOpc == 0) return false;
633
634  unsigned Op1Reg = getRegForValue(Op1);
635  if (Op1Reg == 0) return false;
636  BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
637
638  return true;
639}
640
641bool X86FastISel::X86SelectCmp(Instruction *I) {
642  CmpInst *CI = cast<CmpInst>(I);
643
644  MVT VT;
645  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
646    return false;
647
648  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
649  unsigned SetCCOpc;
650  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
651  switch (CI->getPredicate()) {
652  case CmpInst::FCMP_OEQ: {
653    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
654      return false;
655
656    unsigned EReg = createResultReg(&X86::GR8RegClass);
657    unsigned NPReg = createResultReg(&X86::GR8RegClass);
658    BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
659    BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
660    BuildMI(MBB, DL,
661            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
662    UpdateValueMap(I, ResultReg);
663    return true;
664  }
665  case CmpInst::FCMP_UNE: {
666    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
667      return false;
668
669    unsigned NEReg = createResultReg(&X86::GR8RegClass);
670    unsigned PReg = createResultReg(&X86::GR8RegClass);
671    BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
672    BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
673    BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
674    UpdateValueMap(I, ResultReg);
675    return true;
676  }
677  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
678  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
679  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
680  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
681  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
682  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
683  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
684  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
685  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
686  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
687  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
688  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
689
690  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
691  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
692  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
693  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
694  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
695  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
696  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
697  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
698  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
699  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
700  default:
701    return false;
702  }
703
704  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
705  if (SwapArgs)
706    std::swap(Op0, Op1);
707
708  // Emit a compare of Op0/Op1.
709  if (!X86FastEmitCompare(Op0, Op1, VT))
710    return false;
711
712  BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
713  UpdateValueMap(I, ResultReg);
714  return true;
715}
716
717bool X86FastISel::X86SelectZExt(Instruction *I) {
718  // Handle zero-extension from i1 to i8, which is common.
719  if (I->getType() == Type::Int8Ty &&
720      I->getOperand(0)->getType() == Type::Int1Ty) {
721    unsigned ResultReg = getRegForValue(I->getOperand(0));
722    if (ResultReg == 0) return false;
723    // Set the high bits to zero.
724    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
725    if (ResultReg == 0) return false;
726    UpdateValueMap(I, ResultReg);
727    return true;
728  }
729
730  return false;
731}
732
733
734bool X86FastISel::X86SelectBranch(Instruction *I) {
735  // Unconditional branches are selected by tablegen-generated code.
736  // Handle a conditional branch.
737  BranchInst *BI = cast<BranchInst>(I);
738  MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
739  MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
740
741  // Fold the common case of a conditional branch with a comparison.
742  if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
743    if (CI->hasOneUse()) {
744      MVT VT = TLI.getValueType(CI->getOperand(0)->getType());
745
746      // Try to take advantage of fallthrough opportunities.
747      CmpInst::Predicate Predicate = CI->getPredicate();
748      if (MBB->isLayoutSuccessor(TrueMBB)) {
749        std::swap(TrueMBB, FalseMBB);
750        Predicate = CmpInst::getInversePredicate(Predicate);
751      }
752
753      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
754      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
755
756      switch (Predicate) {
757      case CmpInst::FCMP_OEQ:
758        std::swap(TrueMBB, FalseMBB);
759        Predicate = CmpInst::FCMP_UNE;
760        // FALL THROUGH
761      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
762      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA;  break;
763      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
764      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA;  break;
765      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE; break;
766      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
767      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
768      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP;  break;
769      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE;  break;
770      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB;  break;
771      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE; break;
772      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB;  break;
773      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
774
775      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE;  break;
776      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE; break;
777      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA;  break;
778      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
779      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB;  break;
780      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
781      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG;  break;
782      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
783      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL;  break;
784      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
785      default:
786        return false;
787      }
788
789      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
790      if (SwapArgs)
791        std::swap(Op0, Op1);
792
793      // Emit a compare of the LHS and RHS, setting the flags.
794      if (!X86FastEmitCompare(Op0, Op1, VT))
795        return false;
796
797      BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
798
799      if (Predicate == CmpInst::FCMP_UNE) {
800        // X86 requires a second branch to handle UNE (and OEQ,
801        // which is mapped to UNE above).
802        BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
803      }
804
805      FastEmitBranch(FalseMBB);
806      MBB->addSuccessor(TrueMBB);
807      return true;
808    }
809  } else if (ExtractValueInst *EI =
810             dyn_cast<ExtractValueInst>(BI->getCondition())) {
811    // Check to see if the branch instruction is from an "arithmetic with
812    // overflow" intrinsic. The main way these intrinsics are used is:
813    //
814    //   %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
815    //   %sum = extractvalue { i32, i1 } %t, 0
816    //   %obit = extractvalue { i32, i1 } %t, 1
817    //   br i1 %obit, label %overflow, label %normal
818    //
819    // The %sum and %obit are converted in an ADD and a SETO/SETB before
820    // reaching the branch. Therefore, we search backwards through the MBB
821    // looking for the SETO/SETB instruction. If an instruction modifies the
822    // EFLAGS register before we reach the SETO/SETB instruction, then we can't
823    // convert the branch into a JO/JB instruction.
824    if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
825      if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
826          CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
827        const MachineInstr *SetMI = 0;
828        unsigned Reg = lookUpRegForValue(EI);
829
830        for (MachineBasicBlock::const_reverse_iterator
831               RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
832          const MachineInstr &MI = *RI;
833
834          if (MI.modifiesRegister(Reg)) {
835            unsigned Src, Dst, SrcSR, DstSR;
836
837            if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
838              Reg = Src;
839              continue;
840            }
841
842            SetMI = &MI;
843            break;
844          }
845
846          const TargetInstrDesc &TID = MI.getDesc();
847          if (TID.hasUnmodeledSideEffects() ||
848              TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
849            break;
850        }
851
852        if (SetMI) {
853          unsigned OpCode = SetMI->getOpcode();
854
855          if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
856            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
857              .addMBB(TrueMBB);
858            FastEmitBranch(FalseMBB);
859            MBB->addSuccessor(TrueMBB);
860            return true;
861          }
862        }
863      }
864    }
865  }
866
867  // Otherwise do a clumsy setcc and re-test it.
868  unsigned OpReg = getRegForValue(BI->getCondition());
869  if (OpReg == 0) return false;
870
871  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
872  BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
873  FastEmitBranch(FalseMBB);
874  MBB->addSuccessor(TrueMBB);
875  return true;
876}
877
878bool X86FastISel::X86SelectShift(Instruction *I) {
879  unsigned CReg = 0, OpReg = 0, OpImm = 0;
880  const TargetRegisterClass *RC = NULL;
881  if (I->getType() == Type::Int8Ty) {
882    CReg = X86::CL;
883    RC = &X86::GR8RegClass;
884    switch (I->getOpcode()) {
885    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
886    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
887    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
888    default: return false;
889    }
890  } else if (I->getType() == Type::Int16Ty) {
891    CReg = X86::CX;
892    RC = &X86::GR16RegClass;
893    switch (I->getOpcode()) {
894    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
895    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
896    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
897    default: return false;
898    }
899  } else if (I->getType() == Type::Int32Ty) {
900    CReg = X86::ECX;
901    RC = &X86::GR32RegClass;
902    switch (I->getOpcode()) {
903    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
904    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
905    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
906    default: return false;
907    }
908  } else if (I->getType() == Type::Int64Ty) {
909    CReg = X86::RCX;
910    RC = &X86::GR64RegClass;
911    switch (I->getOpcode()) {
912    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
913    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
914    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
915    default: return false;
916    }
917  } else {
918    return false;
919  }
920
921  MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
922  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
923    return false;
924
925  unsigned Op0Reg = getRegForValue(I->getOperand(0));
926  if (Op0Reg == 0) return false;
927
928  // Fold immediate in shl(x,3).
929  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
930    unsigned ResultReg = createResultReg(RC);
931    BuildMI(MBB, DL, TII.get(OpImm),
932            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
933    UpdateValueMap(I, ResultReg);
934    return true;
935  }
936
937  unsigned Op1Reg = getRegForValue(I->getOperand(1));
938  if (Op1Reg == 0) return false;
939  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
940
941  // The shift instruction uses X86::CL. If we defined a super-register
942  // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
943  // we're doing here.
944  if (CReg != X86::CL)
945    BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL)
946      .addReg(CReg).addImm(X86::SUBREG_8BIT);
947
948  unsigned ResultReg = createResultReg(RC);
949  BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
950  UpdateValueMap(I, ResultReg);
951  return true;
952}
953
954bool X86FastISel::X86SelectSelect(Instruction *I) {
955  MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
956  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
957    return false;
958
959  unsigned Opc = 0;
960  const TargetRegisterClass *RC = NULL;
961  if (VT.getSimpleVT() == MVT::i16) {
962    Opc = X86::CMOVE16rr;
963    RC = &X86::GR16RegClass;
964  } else if (VT.getSimpleVT() == MVT::i32) {
965    Opc = X86::CMOVE32rr;
966    RC = &X86::GR32RegClass;
967  } else if (VT.getSimpleVT() == MVT::i64) {
968    Opc = X86::CMOVE64rr;
969    RC = &X86::GR64RegClass;
970  } else {
971    return false;
972  }
973
974  unsigned Op0Reg = getRegForValue(I->getOperand(0));
975  if (Op0Reg == 0) return false;
976  unsigned Op1Reg = getRegForValue(I->getOperand(1));
977  if (Op1Reg == 0) return false;
978  unsigned Op2Reg = getRegForValue(I->getOperand(2));
979  if (Op2Reg == 0) return false;
980
981  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
982  unsigned ResultReg = createResultReg(RC);
983  BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
984  UpdateValueMap(I, ResultReg);
985  return true;
986}
987
988bool X86FastISel::X86SelectFPExt(Instruction *I) {
989  // fpext from float to double.
990  if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) {
991    Value *V = I->getOperand(0);
992    if (V->getType() == Type::FloatTy) {
993      unsigned OpReg = getRegForValue(V);
994      if (OpReg == 0) return false;
995      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
996      BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
997      UpdateValueMap(I, ResultReg);
998      return true;
999    }
1000  }
1001
1002  return false;
1003}
1004
1005bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
1006  if (Subtarget->hasSSE2()) {
1007    if (I->getType() == Type::FloatTy) {
1008      Value *V = I->getOperand(0);
1009      if (V->getType() == Type::DoubleTy) {
1010        unsigned OpReg = getRegForValue(V);
1011        if (OpReg == 0) return false;
1012        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
1013        BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
1014        UpdateValueMap(I, ResultReg);
1015        return true;
1016      }
1017    }
1018  }
1019
1020  return false;
1021}
1022
1023bool X86FastISel::X86SelectTrunc(Instruction *I) {
1024  if (Subtarget->is64Bit())
1025    // All other cases should be handled by the tblgen generated code.
1026    return false;
1027  MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1028  MVT DstVT = TLI.getValueType(I->getType());
1029
1030  // This code only handles truncation to byte right now.
1031  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1032    // All other cases should be handled by the tblgen generated code.
1033    return false;
1034  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
1035    // All other cases should be handled by the tblgen generated code.
1036    return false;
1037
1038  unsigned InputReg = getRegForValue(I->getOperand(0));
1039  if (!InputReg)
1040    // Unhandled operand.  Halt "fast" selection and bail.
1041    return false;
1042
1043  // First issue a copy to GR16_ABCD or GR32_ABCD.
1044  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
1045  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
1046    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
1047  unsigned CopyReg = createResultReg(CopyRC);
1048  BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
1049
1050  // Then issue an extract_subreg.
1051  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1052                                                  CopyReg, X86::SUBREG_8BIT);
1053  if (!ResultReg)
1054    return false;
1055
1056  UpdateValueMap(I, ResultReg);
1057  return true;
1058}
1059
1060bool X86FastISel::X86SelectExtractValue(Instruction *I) {
1061  ExtractValueInst *EI = cast<ExtractValueInst>(I);
1062  Value *Agg = EI->getAggregateOperand();
1063
1064  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
1065    switch (CI->getIntrinsicID()) {
1066    default: break;
1067    case Intrinsic::sadd_with_overflow:
1068    case Intrinsic::uadd_with_overflow:
1069      // Cheat a little. We know that the registers for "add" and "seto" are
1070      // allocated sequentially. However, we only keep track of the register
1071      // for "add" in the value map. Use extractvalue's index to get the
1072      // correct register for "seto".
1073      UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
1074      return true;
1075    }
1076  }
1077
1078  return false;
1079}
1080
1081bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
1082  // FIXME: Handle more intrinsics.
1083  switch (I.getIntrinsicID()) {
1084  default: return false;
1085  case Intrinsic::sadd_with_overflow:
1086  case Intrinsic::uadd_with_overflow: {
1087    // Replace "add with overflow" intrinsics with an "add" instruction followed
1088    // by a seto/setc instruction. Later on, when the "extractvalue"
1089    // instructions are encountered, we use the fact that two registers were
1090    // created sequentially to get the correct registers for the "sum" and the
1091    // "overflow bit".
1092    const Function *Callee = I.getCalledFunction();
1093    const Type *RetTy =
1094      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1095
1096    MVT VT;
1097    if (!isTypeLegal(RetTy, VT))
1098      return false;
1099
1100    Value *Op1 = I.getOperand(1);
1101    Value *Op2 = I.getOperand(2);
1102    unsigned Reg1 = getRegForValue(Op1);
1103    unsigned Reg2 = getRegForValue(Op2);
1104
1105    if (Reg1 == 0 || Reg2 == 0)
1106      // FIXME: Handle values *not* in registers.
1107      return false;
1108
1109    unsigned OpC = 0;
1110    if (VT == MVT::i32)
1111      OpC = X86::ADD32rr;
1112    else if (VT == MVT::i64)
1113      OpC = X86::ADD64rr;
1114    else
1115      return false;
1116
1117    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1118    BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
1119    unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
1120
1121    // If the add with overflow is an intra-block value then we just want to
1122    // create temporaries for it like normal.  If it is a cross-block value then
1123    // UpdateValueMap will return the cross-block register used.  Since we
1124    // *really* want the value to be live in the register pair known by
1125    // UpdateValueMap, we have to use DestReg1+1 as the destination register in
1126    // the cross block case.  In the non-cross-block case, we should just make
1127    // another register for the value.
1128    if (DestReg1 != ResultReg)
1129      ResultReg = DestReg1+1;
1130    else
1131      ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
1132
1133    unsigned Opc = X86::SETBr;
1134    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
1135      Opc = X86::SETOr;
1136    BuildMI(MBB, DL, TII.get(Opc), ResultReg);
1137    return true;
1138  }
1139  }
1140}
1141
1142bool X86FastISel::X86SelectCall(Instruction *I) {
1143  CallInst *CI = cast<CallInst>(I);
1144  Value *Callee = I->getOperand(0);
1145
1146  // Can't handle inline asm yet.
1147  if (isa<InlineAsm>(Callee))
1148    return false;
1149
1150  // Handle intrinsic calls.
1151  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1152    return X86VisitIntrinsicCall(*II);
1153
1154  // Handle only C and fastcc calling conventions for now.
1155  CallSite CS(CI);
1156  unsigned CC = CS.getCallingConv();
1157  if (CC != CallingConv::C &&
1158      CC != CallingConv::Fast &&
1159      CC != CallingConv::X86_FastCall)
1160    return false;
1161
1162  // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't
1163  // handle this for now.
1164  if (CC == CallingConv::Fast && PerformTailCallOpt)
1165    return false;
1166
1167  // Let SDISel handle vararg functions.
1168  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1169  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1170  if (FTy->isVarArg())
1171    return false;
1172
1173  // Handle *simple* calls for now.
1174  const Type *RetTy = CS.getType();
1175  MVT RetVT;
1176  if (RetTy == Type::VoidTy)
1177    RetVT = MVT::isVoid;
1178  else if (!isTypeLegal(RetTy, RetVT, true))
1179    return false;
1180
1181  // Materialize callee address in a register. FIXME: GV address can be
1182  // handled with a CALLpcrel32 instead.
1183  X86AddressMode CalleeAM;
1184  if (!X86SelectAddress(Callee, CalleeAM, true))
1185    return false;
1186  unsigned CalleeOp = 0;
1187  GlobalValue *GV = 0;
1188  if (CalleeAM.GV != 0) {
1189    GV = CalleeAM.GV;
1190  } else if (CalleeAM.Base.Reg != 0) {
1191    CalleeOp = CalleeAM.Base.Reg;
1192  } else
1193    return false;
1194
1195  // Allow calls which produce i1 results.
1196  bool AndToI1 = false;
1197  if (RetVT == MVT::i1) {
1198    RetVT = MVT::i8;
1199    AndToI1 = true;
1200  }
1201
1202  // Deal with call operands first.
1203  SmallVector<Value*, 8> ArgVals;
1204  SmallVector<unsigned, 8> Args;
1205  SmallVector<MVT, 8> ArgVTs;
1206  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1207  Args.reserve(CS.arg_size());
1208  ArgVals.reserve(CS.arg_size());
1209  ArgVTs.reserve(CS.arg_size());
1210  ArgFlags.reserve(CS.arg_size());
1211  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1212       i != e; ++i) {
1213    unsigned Arg = getRegForValue(*i);
1214    if (Arg == 0)
1215      return false;
1216    ISD::ArgFlagsTy Flags;
1217    unsigned AttrInd = i - CS.arg_begin() + 1;
1218    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1219      Flags.setSExt();
1220    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1221      Flags.setZExt();
1222
1223    // FIXME: Only handle *easy* calls for now.
1224    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1225        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1226        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1227        CS.paramHasAttr(AttrInd, Attribute::ByVal))
1228      return false;
1229
1230    const Type *ArgTy = (*i)->getType();
1231    MVT ArgVT;
1232    if (!isTypeLegal(ArgTy, ArgVT))
1233      return false;
1234    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1235    Flags.setOrigAlign(OriginalAlignment);
1236
1237    Args.push_back(Arg);
1238    ArgVals.push_back(*i);
1239    ArgVTs.push_back(ArgVT);
1240    ArgFlags.push_back(Flags);
1241  }
1242
1243  // Analyze operands of the call, assigning locations to each operand.
1244  SmallVector<CCValAssign, 16> ArgLocs;
1245  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
1246  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
1247
1248  // Get a count of how many bytes are to be pushed on the stack.
1249  unsigned NumBytes = CCInfo.getNextStackOffset();
1250
1251  // Issue CALLSEQ_START
1252  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1253  BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
1254
1255  // Process argument: walk the register/memloc assignments, inserting
1256  // copies / loads.
1257  SmallVector<unsigned, 4> RegArgs;
1258  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1259    CCValAssign &VA = ArgLocs[i];
1260    unsigned Arg = Args[VA.getValNo()];
1261    MVT ArgVT = ArgVTs[VA.getValNo()];
1262
1263    // Promote the value if needed.
1264    switch (VA.getLocInfo()) {
1265    default: assert(0 && "Unknown loc info!");
1266    case CCValAssign::Full: break;
1267    case CCValAssign::SExt: {
1268      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1269                                       Arg, ArgVT, Arg);
1270      assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
1271      Emitted = true;
1272      ArgVT = VA.getLocVT();
1273      break;
1274    }
1275    case CCValAssign::ZExt: {
1276      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1277                                       Arg, ArgVT, Arg);
1278      assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
1279      Emitted = true;
1280      ArgVT = VA.getLocVT();
1281      break;
1282    }
1283    case CCValAssign::AExt: {
1284      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1285                                       Arg, ArgVT, Arg);
1286      if (!Emitted)
1287        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1288                                    Arg, ArgVT, Arg);
1289      if (!Emitted)
1290        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1291                                    Arg, ArgVT, Arg);
1292
1293      assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
1294      ArgVT = VA.getLocVT();
1295      break;
1296    }
1297    }
1298
1299    if (VA.isRegLoc()) {
1300      TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
1301      bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
1302                                      Arg, RC, RC);
1303      assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1304      Emitted = true;
1305      RegArgs.push_back(VA.getLocReg());
1306    } else {
1307      unsigned LocMemOffset = VA.getLocMemOffset();
1308      X86AddressMode AM;
1309      AM.Base.Reg = StackPtr;
1310      AM.Disp = LocMemOffset;
1311      Value *ArgVal = ArgVals[VA.getValNo()];
1312
1313      // If this is a really simple value, emit this with the Value* version of
1314      // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
1315      // can cause us to reevaluate the argument.
1316      if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
1317        X86FastEmitStore(ArgVT, ArgVal, AM);
1318      else
1319        X86FastEmitStore(ArgVT, Arg, AM);
1320    }
1321  }
1322
1323  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1324  // GOT pointer.
1325  if (Subtarget->isPICStyleGOT()) {
1326    TargetRegisterClass *RC = X86::GR32RegisterClass;
1327    unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
1328    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
1329    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1330    Emitted = true;
1331  }
1332
1333  // Issue the call.
1334  MachineInstrBuilder MIB;
1335  if (CalleeOp) {
1336    // Register-indirect call.
1337    unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
1338    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
1339
1340  } else {
1341    // Direct call.
1342    assert(GV && "Not a direct call");
1343    unsigned CallOpc =
1344      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
1345
1346    // See if we need any target-specific flags on the GV operand.
1347    unsigned char OpFlags = 0;
1348
1349    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
1350    // external symbols most go through the PLT in PIC mode.  If the symbol
1351    // has hidden or protected visibility, or if it is static or local, then
1352    // we don't need to use the PLT - we can directly call it.
1353    if (Subtarget->isTargetELF() &&
1354        TM.getRelocationModel() == Reloc::PIC_ &&
1355        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
1356      OpFlags = X86II::MO_PLT;
1357    } else if (Subtarget->isPICStyleStub() &&
1358               (GV->isDeclaration() || GV->isWeakForLinker()) &&
1359               Subtarget->getDarwinVers() < 9) {
1360      // PC-relative references to external symbols should go through $stub,
1361      // unless we're building with the leopard linker or later, which
1362      // automatically synthesizes these stubs.
1363      OpFlags = X86II::MO_DARWIN_STUB;
1364    }
1365
1366
1367    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
1368  }
1369
1370  // Add an implicit use GOT pointer in EBX.
1371  if (Subtarget->isPICStyleGOT())
1372    MIB.addReg(X86::EBX);
1373
1374  // Add implicit physical register uses to the call.
1375  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1376    MIB.addReg(RegArgs[i]);
1377
1378  // Issue CALLSEQ_END
1379  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1380  BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
1381
1382  // Now handle call return value (if any).
1383  if (RetVT.getSimpleVT() != MVT::isVoid) {
1384    SmallVector<CCValAssign, 16> RVLocs;
1385    CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
1386    CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
1387
1388    // Copy all of the result registers out of their specified physreg.
1389    assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1390    MVT CopyVT = RVLocs[0].getValVT();
1391    TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1392    TargetRegisterClass *SrcRC = DstRC;
1393
1394    // If this is a call to a function that returns an fp value on the x87 fp
1395    // stack, but where we prefer to use the value in xmm registers, copy it
1396    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1397    if ((RVLocs[0].getLocReg() == X86::ST0 ||
1398         RVLocs[0].getLocReg() == X86::ST1) &&
1399        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
1400      CopyVT = MVT::f80;
1401      SrcRC = X86::RSTRegisterClass;
1402      DstRC = X86::RFP80RegisterClass;
1403    }
1404
1405    unsigned ResultReg = createResultReg(DstRC);
1406    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
1407                                    RVLocs[0].getLocReg(), DstRC, SrcRC);
1408    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1409    Emitted = true;
1410    if (CopyVT != RVLocs[0].getValVT()) {
1411      // Round the F80 the right size, which also moves to the appropriate xmm
1412      // register. This is accomplished by storing the F80 value in memory and
1413      // then loading it back. Ewww...
1414      MVT ResVT = RVLocs[0].getValVT();
1415      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
1416      unsigned MemSize = ResVT.getSizeInBits()/8;
1417      int FI = MFI.CreateStackObject(MemSize, MemSize);
1418      addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
1419      DstRC = ResVT == MVT::f32
1420        ? X86::FR32RegisterClass : X86::FR64RegisterClass;
1421      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
1422      ResultReg = createResultReg(DstRC);
1423      addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
1424    }
1425
1426    if (AndToI1) {
1427      // Mask out all but lowest bit for some call which produces an i1.
1428      unsigned AndResult = createResultReg(X86::GR8RegisterClass);
1429      BuildMI(MBB, DL,
1430              TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
1431      ResultReg = AndResult;
1432    }
1433
1434    UpdateValueMap(I, ResultReg);
1435  }
1436
1437  return true;
1438}
1439
1440
1441bool
1442X86FastISel::TargetSelectInstruction(Instruction *I)  {
1443  switch (I->getOpcode()) {
1444  default: break;
1445  case Instruction::Load:
1446    return X86SelectLoad(I);
1447  case Instruction::Store:
1448    return X86SelectStore(I);
1449  case Instruction::ICmp:
1450  case Instruction::FCmp:
1451    return X86SelectCmp(I);
1452  case Instruction::ZExt:
1453    return X86SelectZExt(I);
1454  case Instruction::Br:
1455    return X86SelectBranch(I);
1456  case Instruction::Call:
1457    return X86SelectCall(I);
1458  case Instruction::LShr:
1459  case Instruction::AShr:
1460  case Instruction::Shl:
1461    return X86SelectShift(I);
1462  case Instruction::Select:
1463    return X86SelectSelect(I);
1464  case Instruction::Trunc:
1465    return X86SelectTrunc(I);
1466  case Instruction::FPExt:
1467    return X86SelectFPExt(I);
1468  case Instruction::FPTrunc:
1469    return X86SelectFPTrunc(I);
1470  case Instruction::ExtractValue:
1471    return X86SelectExtractValue(I);
1472  case Instruction::IntToPtr: // Deliberate fall-through.
1473  case Instruction::PtrToInt: {
1474    MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1475    MVT DstVT = TLI.getValueType(I->getType());
1476    if (DstVT.bitsGT(SrcVT))
1477      return X86SelectZExt(I);
1478    if (DstVT.bitsLT(SrcVT))
1479      return X86SelectTrunc(I);
1480    unsigned Reg = getRegForValue(I->getOperand(0));
1481    if (Reg == 0) return false;
1482    UpdateValueMap(I, Reg);
1483    return true;
1484  }
1485  }
1486
1487  return false;
1488}
1489
1490unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
1491  MVT VT;
1492  if (!isTypeLegal(C->getType(), VT))
1493    return false;
1494
1495  // Get opcode and regclass of the output for the given load instruction.
1496  unsigned Opc = 0;
1497  const TargetRegisterClass *RC = NULL;
1498  switch (VT.getSimpleVT()) {
1499  default: return false;
1500  case MVT::i8:
1501    Opc = X86::MOV8rm;
1502    RC  = X86::GR8RegisterClass;
1503    break;
1504  case MVT::i16:
1505    Opc = X86::MOV16rm;
1506    RC  = X86::GR16RegisterClass;
1507    break;
1508  case MVT::i32:
1509    Opc = X86::MOV32rm;
1510    RC  = X86::GR32RegisterClass;
1511    break;
1512  case MVT::i64:
1513    // Must be in x86-64 mode.
1514    Opc = X86::MOV64rm;
1515    RC  = X86::GR64RegisterClass;
1516    break;
1517  case MVT::f32:
1518    if (Subtarget->hasSSE1()) {
1519      Opc = X86::MOVSSrm;
1520      RC  = X86::FR32RegisterClass;
1521    } else {
1522      Opc = X86::LD_Fp32m;
1523      RC  = X86::RFP32RegisterClass;
1524    }
1525    break;
1526  case MVT::f64:
1527    if (Subtarget->hasSSE2()) {
1528      Opc = X86::MOVSDrm;
1529      RC  = X86::FR64RegisterClass;
1530    } else {
1531      Opc = X86::LD_Fp64m;
1532      RC  = X86::RFP64RegisterClass;
1533    }
1534    break;
1535  case MVT::f80:
1536    // No f80 support yet.
1537    return false;
1538  }
1539
1540  // Materialize addresses with LEA instructions.
1541  if (isa<GlobalValue>(C)) {
1542    X86AddressMode AM;
1543    if (X86SelectAddress(C, AM, false)) {
1544      if (TLI.getPointerTy() == MVT::i32)
1545        Opc = X86::LEA32r;
1546      else
1547        Opc = X86::LEA64r;
1548      unsigned ResultReg = createResultReg(RC);
1549      addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1550      return ResultReg;
1551    }
1552    return 0;
1553  }
1554
1555  // MachineConstantPool wants an explicit alignment.
1556  unsigned Align = TD.getPrefTypeAlignment(C->getType());
1557  if (Align == 0) {
1558    // Alignment of vector types.  FIXME!
1559    Align = TD.getTypeAllocSize(C->getType());
1560  }
1561
1562  // x86-32 PIC requires a PIC base register for constant pools.
1563  unsigned PICBase = 0;
1564  unsigned char OpFlag = 0;
1565  if (Subtarget->isPICStyleStub() &&
1566      TM.getRelocationModel() == Reloc::PIC_) { // Not dynamic-no-pic
1567    OpFlag = X86II::MO_PIC_BASE_OFFSET;
1568    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
1569  } else if (Subtarget->isPICStyleGOT()) {
1570    OpFlag = X86II::MO_GOTOFF;
1571    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
1572  } else if (Subtarget->isPICStyleRIPRel() &&
1573             TM.getCodeModel() == CodeModel::Small) {
1574    PICBase = X86::RIP;
1575  }
1576
1577  // Create the load from the constant pool.
1578  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
1579  unsigned ResultReg = createResultReg(RC);
1580  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
1581                           MCPOffset, PICBase, OpFlag);
1582
1583  return ResultReg;
1584}
1585
1586unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
1587  // Fail on dynamic allocas. At this point, getRegForValue has already
1588  // checked its CSE maps, so if we're here trying to handle a dynamic
1589  // alloca, we're not going to succeed. X86SelectAddress has a
1590  // check for dynamic allocas, because it's called directly from
1591  // various places, but TargetMaterializeAlloca also needs a check
1592  // in order to avoid recursion between getRegForValue,
1593  // X86SelectAddrss, and TargetMaterializeAlloca.
1594  if (!StaticAllocaMap.count(C))
1595    return 0;
1596
1597  X86AddressMode AM;
1598  if (!X86SelectAddress(C, AM, false))
1599    return 0;
1600  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
1601  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
1602  unsigned ResultReg = createResultReg(RC);
1603  addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1604  return ResultReg;
1605}
1606
1607namespace llvm {
1608  llvm::FastISel *X86::createFastISel(MachineFunction &mf,
1609                        MachineModuleInfo *mmi,
1610                        DwarfWriter *dw,
1611                        DenseMap<const Value *, unsigned> &vm,
1612                        DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
1613                        DenseMap<const AllocaInst *, int> &am
1614#ifndef NDEBUG
1615                        , SmallSet<Instruction*, 8> &cil
1616#endif
1617                        ) {
1618    return new X86FastISel(mf, mmi, dw, vm, bm, am
1619#ifndef NDEBUG
1620                           , cil
1621#endif
1622                           );
1623  }
1624}
1625