ARMFastISel.cpp revision 1948910e3186d31bc0d213ecd0d7e87bb2c2760d
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the ARM-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// ARMGenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMSubtarget.h"
21#include "ARMTargetMachine.h"
22#include "MCTargetDesc/ARMAddressingModes.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/CodeGen/Analysis.h"
25#include "llvm/CodeGen/FastISel.h"
26#include "llvm/CodeGen/FunctionLoweringInfo.h"
27#include "llvm/CodeGen/MachineConstantPool.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineMemOperand.h"
31#include "llvm/CodeGen/MachineModuleInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/IR/CallingConv.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalVariable.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/CallSite.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/GetElementPtrTypeIterator.h"
45#include "llvm/Support/MathExtras.h"
46#include "llvm/Target/TargetInstrInfo.h"
47#include "llvm/Target/TargetLowering.h"
48#include "llvm/Target/TargetMachine.h"
49#include "llvm/Target/TargetOptions.h"
50using namespace llvm;
51
52extern cl::opt<bool> EnableARMLongCalls;
53
54namespace {
55
56  // All possible address modes, plus some.
57  typedef struct Address {
58    enum {
59      RegBase,
60      FrameIndexBase
61    } BaseType;
62
63    union {
64      unsigned Reg;
65      int FI;
66    } Base;
67
68    int Offset;
69
70    // Innocuous defaults for our address.
71    Address()
72     : BaseType(RegBase), Offset(0) {
73       Base.Reg = 0;
74     }
75  } Address;
76
77class ARMFastISel : public FastISel {
78
79  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
80  /// make the right decision when generating code for different targets.
81  const ARMSubtarget *Subtarget;
82  const TargetMachine &TM;
83  const TargetInstrInfo &TII;
84  const TargetLowering &TLI;
85  ARMFunctionInfo *AFI;
86
87  // Convenience variables to avoid some queries.
88  bool isThumb2;
89  LLVMContext *Context;
90
91  public:
92    explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
93                         const TargetLibraryInfo *libInfo)
94    : FastISel(funcInfo, libInfo),
95      TM(funcInfo.MF->getTarget()),
96      TII(*TM.getInstrInfo()),
97      TLI(*TM.getTargetLowering()) {
98      Subtarget = &TM.getSubtarget<ARMSubtarget>();
99      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
100      isThumb2 = AFI->isThumbFunction();
101      Context = &funcInfo.Fn->getContext();
102    }
103
104    // Code from FastISel.cpp.
105  private:
106    unsigned FastEmitInst_(unsigned MachineInstOpcode,
107                           const TargetRegisterClass *RC);
108    unsigned FastEmitInst_r(unsigned MachineInstOpcode,
109                            const TargetRegisterClass *RC,
110                            unsigned Op0, bool Op0IsKill);
111    unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
112                             const TargetRegisterClass *RC,
113                             unsigned Op0, bool Op0IsKill,
114                             unsigned Op1, bool Op1IsKill);
115    unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
116                              const TargetRegisterClass *RC,
117                              unsigned Op0, bool Op0IsKill,
118                              unsigned Op1, bool Op1IsKill,
119                              unsigned Op2, bool Op2IsKill);
120    unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
121                             const TargetRegisterClass *RC,
122                             unsigned Op0, bool Op0IsKill,
123                             uint64_t Imm);
124    unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
125                             const TargetRegisterClass *RC,
126                             unsigned Op0, bool Op0IsKill,
127                             const ConstantFP *FPImm);
128    unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
129                              const TargetRegisterClass *RC,
130                              unsigned Op0, bool Op0IsKill,
131                              unsigned Op1, bool Op1IsKill,
132                              uint64_t Imm);
133    unsigned FastEmitInst_i(unsigned MachineInstOpcode,
134                            const TargetRegisterClass *RC,
135                            uint64_t Imm);
136    unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
137                             const TargetRegisterClass *RC,
138                             uint64_t Imm1, uint64_t Imm2);
139
140    unsigned FastEmitInst_extractsubreg(MVT RetVT,
141                                        unsigned Op0, bool Op0IsKill,
142                                        uint32_t Idx);
143
144    // Backend specific FastISel code.
145  private:
146    virtual bool TargetSelectInstruction(const Instruction *I);
147    virtual unsigned TargetMaterializeConstant(const Constant *C);
148    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
149    virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
150                                     const LoadInst *LI);
151    virtual bool FastLowerArguments();
152  private:
153  #include "ARMGenFastISel.inc"
154
155    // Instruction selection routines.
156  private:
157    bool SelectLoad(const Instruction *I);
158    bool SelectStore(const Instruction *I);
159    bool SelectBranch(const Instruction *I);
160    bool SelectIndirectBr(const Instruction *I);
161    bool SelectCmp(const Instruction *I);
162    bool SelectFPExt(const Instruction *I);
163    bool SelectFPTrunc(const Instruction *I);
164    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
165    bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
166    bool SelectIToFP(const Instruction *I, bool isSigned);
167    bool SelectFPToI(const Instruction *I, bool isSigned);
168    bool SelectDiv(const Instruction *I, bool isSigned);
169    bool SelectRem(const Instruction *I, bool isSigned);
170    bool SelectCall(const Instruction *I, const char *IntrMemName);
171    bool SelectIntrinsicCall(const IntrinsicInst &I);
172    bool SelectSelect(const Instruction *I);
173    bool SelectRet(const Instruction *I);
174    bool SelectTrunc(const Instruction *I);
175    bool SelectIntExt(const Instruction *I);
176    bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
177
178    // Utility routines.
179  private:
180    bool isTypeLegal(Type *Ty, MVT &VT);
181    bool isLoadTypeLegal(Type *Ty, MVT &VT);
182    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
183                    bool isZExt);
184    bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
185                     unsigned Alignment = 0, bool isZExt = true,
186                     bool allocReg = true);
187    bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
188                      unsigned Alignment = 0);
189    bool ARMComputeAddress(const Value *Obj, Address &Addr);
190    void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
191    bool ARMIsMemCpySmall(uint64_t Len);
192    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193                               unsigned Alignment);
194    unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
195    unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
196    unsigned ARMMaterializeInt(const Constant *C, MVT VT);
197    unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
198    unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
199    unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
200    unsigned ARMSelectCallOp(bool UseReg);
201    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
202
203    // Call handling routines.
204  private:
205    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
206                                  bool Return,
207                                  bool isVarArg);
208    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
209                         SmallVectorImpl<unsigned> &ArgRegs,
210                         SmallVectorImpl<MVT> &ArgVTs,
211                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
212                         SmallVectorImpl<unsigned> &RegArgs,
213                         CallingConv::ID CC,
214                         unsigned &NumBytes,
215                         bool isVarArg);
216    unsigned getLibcallReg(const Twine &Name);
217    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
218                    const Instruction *I, CallingConv::ID CC,
219                    unsigned &NumBytes, bool isVarArg);
220    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
221
222    // OptionalDef handling routines.
223  private:
224    bool isARMNEONPred(const MachineInstr *MI);
225    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
226    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
227    void AddLoadStoreOperands(MVT VT, Address &Addr,
228                              const MachineInstrBuilder &MIB,
229                              unsigned Flags, bool useAM3);
230};
231
232} // end anonymous namespace
233
234#include "ARMGenCallingConv.inc"
235
236// DefinesOptionalPredicate - This is different from DefinesPredicate in that
237// we don't care about implicit defs here, just places we'll need to add a
238// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
239bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
240  if (!MI->hasOptionalDef())
241    return false;
242
243  // Look to see if our OptionalDef is defining CPSR or CCR.
244  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
245    const MachineOperand &MO = MI->getOperand(i);
246    if (!MO.isReg() || !MO.isDef()) continue;
247    if (MO.getReg() == ARM::CPSR)
248      *CPSR = true;
249  }
250  return true;
251}
252
253bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
254  const MCInstrDesc &MCID = MI->getDesc();
255
256  // If we're a thumb2 or not NEON function we were handled via isPredicable.
257  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
258       AFI->isThumb2Function())
259    return false;
260
261  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
262    if (MCID.OpInfo[i].isPredicate())
263      return true;
264
265  return false;
266}
267
268// If the machine is predicable go ahead and add the predicate operands, if
269// it needs default CC operands add those.
270// TODO: If we want to support thumb1 then we'll need to deal with optional
271// CPSR defs that need to be added before the remaining operands. See s_cc_out
272// for descriptions why.
273const MachineInstrBuilder &
274ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
275  MachineInstr *MI = &*MIB;
276
277  // Do we use a predicate? or...
278  // Are we NEON in ARM mode and have a predicate operand? If so, I know
279  // we're not predicable but add it anyways.
280  if (TII.isPredicable(MI) || isARMNEONPred(MI))
281    AddDefaultPred(MIB);
282
283  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
284  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
285  bool CPSR = false;
286  if (DefinesOptionalPredicate(MI, &CPSR)) {
287    if (CPSR)
288      AddDefaultT1CC(MIB);
289    else
290      AddDefaultCC(MIB);
291  }
292  return MIB;
293}
294
295unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
296                                    const TargetRegisterClass* RC) {
297  unsigned ResultReg = createResultReg(RC);
298  const MCInstrDesc &II = TII.get(MachineInstOpcode);
299
300  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
301  return ResultReg;
302}
303
304unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
305                                     const TargetRegisterClass *RC,
306                                     unsigned Op0, bool Op0IsKill) {
307  unsigned ResultReg = createResultReg(RC);
308  const MCInstrDesc &II = TII.get(MachineInstOpcode);
309
310  if (II.getNumDefs() >= 1) {
311    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
312                   .addReg(Op0, Op0IsKill * RegState::Kill));
313  } else {
314    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
315                   .addReg(Op0, Op0IsKill * RegState::Kill));
316    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
317                   TII.get(TargetOpcode::COPY), ResultReg)
318                   .addReg(II.ImplicitDefs[0]));
319  }
320  return ResultReg;
321}
322
323unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
324                                      const TargetRegisterClass *RC,
325                                      unsigned Op0, bool Op0IsKill,
326                                      unsigned Op1, bool Op1IsKill) {
327  unsigned ResultReg = createResultReg(RC);
328  const MCInstrDesc &II = TII.get(MachineInstOpcode);
329
330  if (II.getNumDefs() >= 1) {
331    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
332                   .addReg(Op0, Op0IsKill * RegState::Kill)
333                   .addReg(Op1, Op1IsKill * RegState::Kill));
334  } else {
335    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
336                   .addReg(Op0, Op0IsKill * RegState::Kill)
337                   .addReg(Op1, Op1IsKill * RegState::Kill));
338    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
339                           TII.get(TargetOpcode::COPY), ResultReg)
340                   .addReg(II.ImplicitDefs[0]));
341  }
342  return ResultReg;
343}
344
345unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
346                                       const TargetRegisterClass *RC,
347                                       unsigned Op0, bool Op0IsKill,
348                                       unsigned Op1, bool Op1IsKill,
349                                       unsigned Op2, bool Op2IsKill) {
350  unsigned ResultReg = createResultReg(RC);
351  const MCInstrDesc &II = TII.get(MachineInstOpcode);
352
353  if (II.getNumDefs() >= 1) {
354    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
355                   .addReg(Op0, Op0IsKill * RegState::Kill)
356                   .addReg(Op1, Op1IsKill * RegState::Kill)
357                   .addReg(Op2, Op2IsKill * RegState::Kill));
358  } else {
359    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
360                   .addReg(Op0, Op0IsKill * RegState::Kill)
361                   .addReg(Op1, Op1IsKill * RegState::Kill)
362                   .addReg(Op2, Op2IsKill * RegState::Kill));
363    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
364                           TII.get(TargetOpcode::COPY), ResultReg)
365                   .addReg(II.ImplicitDefs[0]));
366  }
367  return ResultReg;
368}
369
370unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
371                                      const TargetRegisterClass *RC,
372                                      unsigned Op0, bool Op0IsKill,
373                                      uint64_t Imm) {
374  unsigned ResultReg = createResultReg(RC);
375  const MCInstrDesc &II = TII.get(MachineInstOpcode);
376
377  if (II.getNumDefs() >= 1) {
378    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
379                   .addReg(Op0, Op0IsKill * RegState::Kill)
380                   .addImm(Imm));
381  } else {
382    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
383                   .addReg(Op0, Op0IsKill * RegState::Kill)
384                   .addImm(Imm));
385    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
386                           TII.get(TargetOpcode::COPY), ResultReg)
387                   .addReg(II.ImplicitDefs[0]));
388  }
389  return ResultReg;
390}
391
392unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
393                                      const TargetRegisterClass *RC,
394                                      unsigned Op0, bool Op0IsKill,
395                                      const ConstantFP *FPImm) {
396  unsigned ResultReg = createResultReg(RC);
397  const MCInstrDesc &II = TII.get(MachineInstOpcode);
398
399  if (II.getNumDefs() >= 1) {
400    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
401                   .addReg(Op0, Op0IsKill * RegState::Kill)
402                   .addFPImm(FPImm));
403  } else {
404    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
405                   .addReg(Op0, Op0IsKill * RegState::Kill)
406                   .addFPImm(FPImm));
407    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
408                           TII.get(TargetOpcode::COPY), ResultReg)
409                   .addReg(II.ImplicitDefs[0]));
410  }
411  return ResultReg;
412}
413
414unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
415                                       const TargetRegisterClass *RC,
416                                       unsigned Op0, bool Op0IsKill,
417                                       unsigned Op1, bool Op1IsKill,
418                                       uint64_t Imm) {
419  unsigned ResultReg = createResultReg(RC);
420  const MCInstrDesc &II = TII.get(MachineInstOpcode);
421
422  if (II.getNumDefs() >= 1) {
423    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
424                   .addReg(Op0, Op0IsKill * RegState::Kill)
425                   .addReg(Op1, Op1IsKill * RegState::Kill)
426                   .addImm(Imm));
427  } else {
428    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
429                   .addReg(Op0, Op0IsKill * RegState::Kill)
430                   .addReg(Op1, Op1IsKill * RegState::Kill)
431                   .addImm(Imm));
432    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
433                           TII.get(TargetOpcode::COPY), ResultReg)
434                   .addReg(II.ImplicitDefs[0]));
435  }
436  return ResultReg;
437}
438
439unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
440                                     const TargetRegisterClass *RC,
441                                     uint64_t Imm) {
442  unsigned ResultReg = createResultReg(RC);
443  const MCInstrDesc &II = TII.get(MachineInstOpcode);
444
445  if (II.getNumDefs() >= 1) {
446    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
447                   .addImm(Imm));
448  } else {
449    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
450                   .addImm(Imm));
451    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
452                           TII.get(TargetOpcode::COPY), ResultReg)
453                   .addReg(II.ImplicitDefs[0]));
454  }
455  return ResultReg;
456}
457
458unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
459                                      const TargetRegisterClass *RC,
460                                      uint64_t Imm1, uint64_t Imm2) {
461  unsigned ResultReg = createResultReg(RC);
462  const MCInstrDesc &II = TII.get(MachineInstOpcode);
463
464  if (II.getNumDefs() >= 1) {
465    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
466                    .addImm(Imm1).addImm(Imm2));
467  } else {
468    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
469                    .addImm(Imm1).addImm(Imm2));
470    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
471                            TII.get(TargetOpcode::COPY),
472                            ResultReg)
473                    .addReg(II.ImplicitDefs[0]));
474  }
475  return ResultReg;
476}
477
478unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
479                                                 unsigned Op0, bool Op0IsKill,
480                                                 uint32_t Idx) {
481  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
482  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
483         "Cannot yet extract from physregs");
484
485  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
486                          DL, TII.get(TargetOpcode::COPY), ResultReg)
487                  .addReg(Op0, getKillRegState(Op0IsKill), Idx));
488  return ResultReg;
489}
490
491// TODO: Don't worry about 64-bit now, but when this is fixed remove the
492// checks from the various callers.
493unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
494  if (VT == MVT::f64) return 0;
495
496  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
497  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
498                          TII.get(ARM::VMOVSR), MoveReg)
499                  .addReg(SrcReg));
500  return MoveReg;
501}
502
503unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
504  if (VT == MVT::i64) return 0;
505
506  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
507  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
508                          TII.get(ARM::VMOVRS), MoveReg)
509                  .addReg(SrcReg));
510  return MoveReg;
511}
512
513// For double width floating point we need to materialize two constants
514// (the high and the low) into integer registers then use a move to get
515// the combined constant into an FP reg.
516unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
517  const APFloat Val = CFP->getValueAPF();
518  bool is64bit = VT == MVT::f64;
519
520  // This checks to see if we can use VFP3 instructions to materialize
521  // a constant, otherwise we have to go through the constant pool.
522  if (TLI.isFPImmLegal(Val, VT)) {
523    int Imm;
524    unsigned Opc;
525    if (is64bit) {
526      Imm = ARM_AM::getFP64Imm(Val);
527      Opc = ARM::FCONSTD;
528    } else {
529      Imm = ARM_AM::getFP32Imm(Val);
530      Opc = ARM::FCONSTS;
531    }
532    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
533    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
534                            DestReg)
535                    .addImm(Imm));
536    return DestReg;
537  }
538
539  // Require VFP2 for loading fp constants.
540  if (!Subtarget->hasVFP2()) return false;
541
542  // MachineConstantPool wants an explicit alignment.
543  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
544  if (Align == 0) {
545    // TODO: Figure out if this is correct.
546    Align = TD.getTypeAllocSize(CFP->getType());
547  }
548  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
549  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
550  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
551
552  // The extra reg is for addrmode5.
553  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
554                          DestReg)
555                  .addConstantPoolIndex(Idx)
556                  .addReg(0));
557  return DestReg;
558}
559
560unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
561
562  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
563    return false;
564
565  // If we can do this in a single instruction without a constant pool entry
566  // do so now.
567  const ConstantInt *CI = cast<ConstantInt>(C);
568  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
569    unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
570    const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
571      &ARM::GPRRegClass;
572    unsigned ImmReg = createResultReg(RC);
573    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
574                            TII.get(Opc), ImmReg)
575                    .addImm(CI->getZExtValue()));
576    return ImmReg;
577  }
578
579  // Use MVN to emit negative constants.
580  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
581    unsigned Imm = (unsigned)~(CI->getSExtValue());
582    bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
583      (ARM_AM::getSOImmVal(Imm) != -1);
584    if (UseImm) {
585      unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
586      unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
587      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
588                              TII.get(Opc), ImmReg)
589                      .addImm(Imm));
590      return ImmReg;
591    }
592  }
593
594  // Load from constant pool.  For now 32-bit only.
595  if (VT != MVT::i32)
596    return false;
597
598  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
599
600  // MachineConstantPool wants an explicit alignment.
601  unsigned Align = TD.getPrefTypeAlignment(C->getType());
602  if (Align == 0) {
603    // TODO: Figure out if this is correct.
604    Align = TD.getTypeAllocSize(C->getType());
605  }
606  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
607
608  if (isThumb2)
609    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
610                            TII.get(ARM::t2LDRpci), DestReg)
611                    .addConstantPoolIndex(Idx));
612  else
613    // The extra immediate is for addrmode2.
614    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
615                            TII.get(ARM::LDRcp), DestReg)
616                    .addConstantPoolIndex(Idx)
617                    .addImm(0));
618
619  return DestReg;
620}
621
622unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
623  // For now 32-bit only.
624  if (VT != MVT::i32) return 0;
625
626  Reloc::Model RelocM = TM.getRelocationModel();
627  bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
628  const TargetRegisterClass *RC = isThumb2 ?
629    (const TargetRegisterClass*)&ARM::rGPRRegClass :
630    (const TargetRegisterClass*)&ARM::GPRRegClass;
631  unsigned DestReg = createResultReg(RC);
632
633  // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG.
634  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
635  bool IsThreadLocal = GVar && GVar->isThreadLocal();
636  if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0;
637
638  // Use movw+movt when possible, it avoids constant pool entries.
639  // Darwin targets don't support movt with Reloc::Static, see
640  // ARMTargetLowering::LowerGlobalAddressDarwin.  Other targets only support
641  // static movt relocations.
642  if (Subtarget->useMovt() &&
643      Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
644    unsigned Opc;
645    switch (RelocM) {
646    case Reloc::PIC_:
647      Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
648      break;
649    case Reloc::DynamicNoPIC:
650      Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
651      break;
652    default:
653      Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
654      break;
655    }
656    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
657                            DestReg).addGlobalAddress(GV));
658  } else {
659    // MachineConstantPool wants an explicit alignment.
660    unsigned Align = TD.getPrefTypeAlignment(GV->getType());
661    if (Align == 0) {
662      // TODO: Figure out if this is correct.
663      Align = TD.getTypeAllocSize(GV->getType());
664    }
665
666    if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
667      return ARMLowerPICELF(GV, Align, VT);
668
669    // Grab index.
670    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
671      (Subtarget->isThumb() ? 4 : 8);
672    unsigned Id = AFI->createPICLabelUId();
673    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
674                                                                ARMCP::CPValue,
675                                                                PCAdj);
676    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
677
678    // Load value.
679    MachineInstrBuilder MIB;
680    if (isThumb2) {
681      unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
682      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
683        .addConstantPoolIndex(Idx);
684      if (RelocM == Reloc::PIC_)
685        MIB.addImm(Id);
686      AddOptionalDefs(MIB);
687    } else {
688      // The extra immediate is for addrmode2.
689      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
690                    DestReg)
691        .addConstantPoolIndex(Idx)
692        .addImm(0);
693      AddOptionalDefs(MIB);
694
695      if (RelocM == Reloc::PIC_) {
696        unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
697        unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
698
699        MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
700                                          DL, TII.get(Opc), NewDestReg)
701                                  .addReg(DestReg)
702                                  .addImm(Id);
703        AddOptionalDefs(MIB);
704        return NewDestReg;
705      }
706    }
707  }
708
709  if (IsIndirect) {
710    MachineInstrBuilder MIB;
711    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
712    if (isThumb2)
713      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
714                    TII.get(ARM::t2LDRi12), NewDestReg)
715            .addReg(DestReg)
716            .addImm(0);
717    else
718      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
719                    NewDestReg)
720            .addReg(DestReg)
721            .addImm(0);
722    DestReg = NewDestReg;
723    AddOptionalDefs(MIB);
724  }
725
726  return DestReg;
727}
728
729unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
730  EVT CEVT = TLI.getValueType(C->getType(), true);
731
732  // Only handle simple types.
733  if (!CEVT.isSimple()) return 0;
734  MVT VT = CEVT.getSimpleVT();
735
736  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
737    return ARMMaterializeFP(CFP, VT);
738  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
739    return ARMMaterializeGV(GV, VT);
740  else if (isa<ConstantInt>(C))
741    return ARMMaterializeInt(C, VT);
742
743  return 0;
744}
745
746// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
747
748unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
749  // Don't handle dynamic allocas.
750  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
751
752  MVT VT;
753  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
754
755  DenseMap<const AllocaInst*, int>::iterator SI =
756    FuncInfo.StaticAllocaMap.find(AI);
757
758  // This will get lowered later into the correct offsets and registers
759  // via rewriteXFrameIndex.
760  if (SI != FuncInfo.StaticAllocaMap.end()) {
761    const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
762    unsigned ResultReg = createResultReg(RC);
763    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
764    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
765                            TII.get(Opc), ResultReg)
766                            .addFrameIndex(SI->second)
767                            .addImm(0));
768    return ResultReg;
769  }
770
771  return 0;
772}
773
774bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
775  EVT evt = TLI.getValueType(Ty, true);
776
777  // Only handle simple types.
778  if (evt == MVT::Other || !evt.isSimple()) return false;
779  VT = evt.getSimpleVT();
780
781  // Handle all legal types, i.e. a register that will directly hold this
782  // value.
783  return TLI.isTypeLegal(VT);
784}
785
786bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
787  if (isTypeLegal(Ty, VT)) return true;
788
789  // If this is a type than can be sign or zero-extended to a basic operation
790  // go ahead and accept it now.
791  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
792    return true;
793
794  return false;
795}
796
797// Computes the address to get to an object.
798bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
799  // Some boilerplate from the X86 FastISel.
800  const User *U = NULL;
801  unsigned Opcode = Instruction::UserOp1;
802  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
803    // Don't walk into other basic blocks unless the object is an alloca from
804    // another block, otherwise it may not have a virtual register assigned.
805    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
806        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807      Opcode = I->getOpcode();
808      U = I;
809    }
810  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
811    Opcode = C->getOpcode();
812    U = C;
813  }
814
815  if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
816    if (Ty->getAddressSpace() > 255)
817      // Fast instruction selection doesn't support the special
818      // address spaces.
819      return false;
820
821  switch (Opcode) {
822    default:
823    break;
824    case Instruction::BitCast: {
825      // Look through bitcasts.
826      return ARMComputeAddress(U->getOperand(0), Addr);
827    }
828    case Instruction::IntToPtr: {
829      // Look past no-op inttoptrs.
830      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
831        return ARMComputeAddress(U->getOperand(0), Addr);
832      break;
833    }
834    case Instruction::PtrToInt: {
835      // Look past no-op ptrtoints.
836      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
837        return ARMComputeAddress(U->getOperand(0), Addr);
838      break;
839    }
840    case Instruction::GetElementPtr: {
841      Address SavedAddr = Addr;
842      int TmpOffset = Addr.Offset;
843
844      // Iterate through the GEP folding the constants into offsets where
845      // we can.
846      gep_type_iterator GTI = gep_type_begin(U);
847      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
848           i != e; ++i, ++GTI) {
849        const Value *Op = *i;
850        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
851          const StructLayout *SL = TD.getStructLayout(STy);
852          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
853          TmpOffset += SL->getElementOffset(Idx);
854        } else {
855          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
856          for (;;) {
857            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
858              // Constant-offset addressing.
859              TmpOffset += CI->getSExtValue() * S;
860              break;
861            }
862            if (isa<AddOperator>(Op) &&
863                (!isa<Instruction>(Op) ||
864                 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
865                 == FuncInfo.MBB) &&
866                isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
867              // An add (in the same block) with a constant operand. Fold the
868              // constant.
869              ConstantInt *CI =
870              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
871              TmpOffset += CI->getSExtValue() * S;
872              // Iterate on the other operand.
873              Op = cast<AddOperator>(Op)->getOperand(0);
874              continue;
875            }
876            // Unsupported
877            goto unsupported_gep;
878          }
879        }
880      }
881
882      // Try to grab the base operand now.
883      Addr.Offset = TmpOffset;
884      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
885
886      // We failed, restore everything and try the other options.
887      Addr = SavedAddr;
888
889      unsupported_gep:
890      break;
891    }
892    case Instruction::Alloca: {
893      const AllocaInst *AI = cast<AllocaInst>(Obj);
894      DenseMap<const AllocaInst*, int>::iterator SI =
895        FuncInfo.StaticAllocaMap.find(AI);
896      if (SI != FuncInfo.StaticAllocaMap.end()) {
897        Addr.BaseType = Address::FrameIndexBase;
898        Addr.Base.FI = SI->second;
899        return true;
900      }
901      break;
902    }
903  }
904
905  // Try to get this in a register if nothing else has worked.
906  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
907  return Addr.Base.Reg != 0;
908}
909
910void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
911  bool needsLowering = false;
912  switch (VT.SimpleTy) {
913    default: llvm_unreachable("Unhandled load/store type!");
914    case MVT::i1:
915    case MVT::i8:
916    case MVT::i16:
917    case MVT::i32:
918      if (!useAM3) {
919        // Integer loads/stores handle 12-bit offsets.
920        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
921        // Handle negative offsets.
922        if (needsLowering && isThumb2)
923          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
924                            Addr.Offset > -256);
925      } else {
926        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
927        needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
928      }
929      break;
930    case MVT::f32:
931    case MVT::f64:
932      // Floating point operands handle 8-bit offsets.
933      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
934      break;
935  }
936
937  // If this is a stack pointer and the offset needs to be simplified then
938  // put the alloca address into a register, set the base type back to
939  // register and continue. This should almost never happen.
940  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
941    const TargetRegisterClass *RC = isThumb2 ?
942      (const TargetRegisterClass*)&ARM::tGPRRegClass :
943      (const TargetRegisterClass*)&ARM::GPRRegClass;
944    unsigned ResultReg = createResultReg(RC);
945    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
946    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
947                            TII.get(Opc), ResultReg)
948                            .addFrameIndex(Addr.Base.FI)
949                            .addImm(0));
950    Addr.Base.Reg = ResultReg;
951    Addr.BaseType = Address::RegBase;
952  }
953
954  // Since the offset is too large for the load/store instruction
955  // get the reg+offset into a register.
956  if (needsLowering) {
957    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
958                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
959    Addr.Offset = 0;
960  }
961}
962
963void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
964                                       const MachineInstrBuilder &MIB,
965                                       unsigned Flags, bool useAM3) {
966  // addrmode5 output depends on the selection dag addressing dividing the
967  // offset by 4 that it then later multiplies. Do this here as well.
968  if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
969    Addr.Offset /= 4;
970
971  // Frame base works a bit differently. Handle it separately.
972  if (Addr.BaseType == Address::FrameIndexBase) {
973    int FI = Addr.Base.FI;
974    int Offset = Addr.Offset;
975    MachineMemOperand *MMO =
976          FuncInfo.MF->getMachineMemOperand(
977                                  MachinePointerInfo::getFixedStack(FI, Offset),
978                                  Flags,
979                                  MFI.getObjectSize(FI),
980                                  MFI.getObjectAlignment(FI));
981    // Now add the rest of the operands.
982    MIB.addFrameIndex(FI);
983
984    // ARM halfword load/stores and signed byte loads need an additional
985    // operand.
986    if (useAM3) {
987      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
988      MIB.addReg(0);
989      MIB.addImm(Imm);
990    } else {
991      MIB.addImm(Addr.Offset);
992    }
993    MIB.addMemOperand(MMO);
994  } else {
995    // Now add the rest of the operands.
996    MIB.addReg(Addr.Base.Reg);
997
998    // ARM halfword load/stores and signed byte loads need an additional
999    // operand.
1000    if (useAM3) {
1001      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
1002      MIB.addReg(0);
1003      MIB.addImm(Imm);
1004    } else {
1005      MIB.addImm(Addr.Offset);
1006    }
1007  }
1008  AddOptionalDefs(MIB);
1009}
1010
1011bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
1012                              unsigned Alignment, bool isZExt, bool allocReg) {
1013  unsigned Opc;
1014  bool useAM3 = false;
1015  bool needVMOV = false;
1016  const TargetRegisterClass *RC;
1017  switch (VT.SimpleTy) {
1018    // This is mostly going to be Neon/vector support.
1019    default: return false;
1020    case MVT::i1:
1021    case MVT::i8:
1022      if (isThumb2) {
1023        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1024          Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
1025        else
1026          Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
1027      } else {
1028        if (isZExt) {
1029          Opc = ARM::LDRBi12;
1030        } else {
1031          Opc = ARM::LDRSB;
1032          useAM3 = true;
1033        }
1034      }
1035      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1036      break;
1037    case MVT::i16:
1038      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1039        return false;
1040
1041      if (isThumb2) {
1042        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1043          Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
1044        else
1045          Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
1046      } else {
1047        Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
1048        useAM3 = true;
1049      }
1050      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1051      break;
1052    case MVT::i32:
1053      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1054        return false;
1055
1056      if (isThumb2) {
1057        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1058          Opc = ARM::t2LDRi8;
1059        else
1060          Opc = ARM::t2LDRi12;
1061      } else {
1062        Opc = ARM::LDRi12;
1063      }
1064      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1065      break;
1066    case MVT::f32:
1067      if (!Subtarget->hasVFP2()) return false;
1068      // Unaligned loads need special handling. Floats require word-alignment.
1069      if (Alignment && Alignment < 4) {
1070        needVMOV = true;
1071        VT = MVT::i32;
1072        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1073        RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1074      } else {
1075        Opc = ARM::VLDRS;
1076        RC = TLI.getRegClassFor(VT);
1077      }
1078      break;
1079    case MVT::f64:
1080      if (!Subtarget->hasVFP2()) return false;
1081      // FIXME: Unaligned loads need special handling.  Doublewords require
1082      // word-alignment.
1083      if (Alignment && Alignment < 4)
1084        return false;
1085
1086      Opc = ARM::VLDRD;
1087      RC = TLI.getRegClassFor(VT);
1088      break;
1089  }
1090  // Simplify this down to something we can handle.
1091  ARMSimplifyAddress(Addr, VT, useAM3);
1092
1093  // Create the base instruction, then add the operands.
1094  if (allocReg)
1095    ResultReg = createResultReg(RC);
1096  assert (ResultReg > 255 && "Expected an allocated virtual register.");
1097  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1098                                    TII.get(Opc), ResultReg);
1099  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1100
1101  // If we had an unaligned load of a float we've converted it to an regular
1102  // load.  Now we must move from the GRP to the FP register.
1103  if (needVMOV) {
1104    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1105    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1106                            TII.get(ARM::VMOVSR), MoveReg)
1107                    .addReg(ResultReg));
1108    ResultReg = MoveReg;
1109  }
1110  return true;
1111}
1112
1113bool ARMFastISel::SelectLoad(const Instruction *I) {
1114  // Atomic loads need special handling.
1115  if (cast<LoadInst>(I)->isAtomic())
1116    return false;
1117
1118  // Verify we have a legal type before going any further.
1119  MVT VT;
1120  if (!isLoadTypeLegal(I->getType(), VT))
1121    return false;
1122
1123  // See if we can handle this address.
1124  Address Addr;
1125  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1126
1127  unsigned ResultReg;
1128  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1129    return false;
1130  UpdateValueMap(I, ResultReg);
1131  return true;
1132}
1133
1134bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
1135                               unsigned Alignment) {
1136  unsigned StrOpc;
1137  bool useAM3 = false;
1138  switch (VT.SimpleTy) {
1139    // This is mostly going to be Neon/vector support.
1140    default: return false;
1141    case MVT::i1: {
1142      unsigned Res = createResultReg(isThumb2 ?
1143        (const TargetRegisterClass*)&ARM::tGPRRegClass :
1144        (const TargetRegisterClass*)&ARM::GPRRegClass);
1145      unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1146      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1147                              TII.get(Opc), Res)
1148                      .addReg(SrcReg).addImm(1));
1149      SrcReg = Res;
1150    } // Fallthrough here.
1151    case MVT::i8:
1152      if (isThumb2) {
1153        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1154          StrOpc = ARM::t2STRBi8;
1155        else
1156          StrOpc = ARM::t2STRBi12;
1157      } else {
1158        StrOpc = ARM::STRBi12;
1159      }
1160      break;
1161    case MVT::i16:
1162      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1163        return false;
1164
1165      if (isThumb2) {
1166        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1167          StrOpc = ARM::t2STRHi8;
1168        else
1169          StrOpc = ARM::t2STRHi12;
1170      } else {
1171        StrOpc = ARM::STRH;
1172        useAM3 = true;
1173      }
1174      break;
1175    case MVT::i32:
1176      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1177        return false;
1178
1179      if (isThumb2) {
1180        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1181          StrOpc = ARM::t2STRi8;
1182        else
1183          StrOpc = ARM::t2STRi12;
1184      } else {
1185        StrOpc = ARM::STRi12;
1186      }
1187      break;
1188    case MVT::f32:
1189      if (!Subtarget->hasVFP2()) return false;
1190      // Unaligned stores need special handling. Floats require word-alignment.
1191      if (Alignment && Alignment < 4) {
1192        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1193        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1194                                TII.get(ARM::VMOVRS), MoveReg)
1195                        .addReg(SrcReg));
1196        SrcReg = MoveReg;
1197        VT = MVT::i32;
1198        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1199      } else {
1200        StrOpc = ARM::VSTRS;
1201      }
1202      break;
1203    case MVT::f64:
1204      if (!Subtarget->hasVFP2()) return false;
1205      // FIXME: Unaligned stores need special handling.  Doublewords require
1206      // word-alignment.
1207      if (Alignment && Alignment < 4)
1208          return false;
1209
1210      StrOpc = ARM::VSTRD;
1211      break;
1212  }
1213  // Simplify this down to something we can handle.
1214  ARMSimplifyAddress(Addr, VT, useAM3);
1215
1216  // Create the base instruction, then add the operands.
1217  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1218                                    TII.get(StrOpc))
1219                            .addReg(SrcReg);
1220  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1221  return true;
1222}
1223
1224bool ARMFastISel::SelectStore(const Instruction *I) {
1225  Value *Op0 = I->getOperand(0);
1226  unsigned SrcReg = 0;
1227
1228  // Atomic stores need special handling.
1229  if (cast<StoreInst>(I)->isAtomic())
1230    return false;
1231
1232  // Verify we have a legal type before going any further.
1233  MVT VT;
1234  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1235    return false;
1236
1237  // Get the value to be stored into a register.
1238  SrcReg = getRegForValue(Op0);
1239  if (SrcReg == 0) return false;
1240
1241  // See if we can handle this address.
1242  Address Addr;
1243  if (!ARMComputeAddress(I->getOperand(1), Addr))
1244    return false;
1245
1246  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1247    return false;
1248  return true;
1249}
1250
1251static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1252  switch (Pred) {
1253    // Needs two compares...
1254    case CmpInst::FCMP_ONE:
1255    case CmpInst::FCMP_UEQ:
1256    default:
1257      // AL is our "false" for now. The other two need more compares.
1258      return ARMCC::AL;
1259    case CmpInst::ICMP_EQ:
1260    case CmpInst::FCMP_OEQ:
1261      return ARMCC::EQ;
1262    case CmpInst::ICMP_SGT:
1263    case CmpInst::FCMP_OGT:
1264      return ARMCC::GT;
1265    case CmpInst::ICMP_SGE:
1266    case CmpInst::FCMP_OGE:
1267      return ARMCC::GE;
1268    case CmpInst::ICMP_UGT:
1269    case CmpInst::FCMP_UGT:
1270      return ARMCC::HI;
1271    case CmpInst::FCMP_OLT:
1272      return ARMCC::MI;
1273    case CmpInst::ICMP_ULE:
1274    case CmpInst::FCMP_OLE:
1275      return ARMCC::LS;
1276    case CmpInst::FCMP_ORD:
1277      return ARMCC::VC;
1278    case CmpInst::FCMP_UNO:
1279      return ARMCC::VS;
1280    case CmpInst::FCMP_UGE:
1281      return ARMCC::PL;
1282    case CmpInst::ICMP_SLT:
1283    case CmpInst::FCMP_ULT:
1284      return ARMCC::LT;
1285    case CmpInst::ICMP_SLE:
1286    case CmpInst::FCMP_ULE:
1287      return ARMCC::LE;
1288    case CmpInst::FCMP_UNE:
1289    case CmpInst::ICMP_NE:
1290      return ARMCC::NE;
1291    case CmpInst::ICMP_UGE:
1292      return ARMCC::HS;
1293    case CmpInst::ICMP_ULT:
1294      return ARMCC::LO;
1295  }
1296}
1297
1298bool ARMFastISel::SelectBranch(const Instruction *I) {
1299  const BranchInst *BI = cast<BranchInst>(I);
1300  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1301  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1302
1303  // Simple branch support.
1304
1305  // If we can, avoid recomputing the compare - redoing it could lead to wonky
1306  // behavior.
1307  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1308    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1309
1310      // Get the compare predicate.
1311      // Try to take advantage of fallthrough opportunities.
1312      CmpInst::Predicate Predicate = CI->getPredicate();
1313      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1314        std::swap(TBB, FBB);
1315        Predicate = CmpInst::getInversePredicate(Predicate);
1316      }
1317
1318      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1319
1320      // We may not handle every CC for now.
1321      if (ARMPred == ARMCC::AL) return false;
1322
1323      // Emit the compare.
1324      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1325        return false;
1326
1327      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1328      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1329      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1330      FastEmitBranch(FBB, DL);
1331      FuncInfo.MBB->addSuccessor(TBB);
1332      return true;
1333    }
1334  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1335    MVT SourceVT;
1336    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1337        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1338      unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1339      unsigned OpReg = getRegForValue(TI->getOperand(0));
1340      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1341                              TII.get(TstOpc))
1342                      .addReg(OpReg).addImm(1));
1343
1344      unsigned CCMode = ARMCC::NE;
1345      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1346        std::swap(TBB, FBB);
1347        CCMode = ARMCC::EQ;
1348      }
1349
1350      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1351      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1352      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1353
1354      FastEmitBranch(FBB, DL);
1355      FuncInfo.MBB->addSuccessor(TBB);
1356      return true;
1357    }
1358  } else if (const ConstantInt *CI =
1359             dyn_cast<ConstantInt>(BI->getCondition())) {
1360    uint64_t Imm = CI->getZExtValue();
1361    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1362    FastEmitBranch(Target, DL);
1363    return true;
1364  }
1365
1366  unsigned CmpReg = getRegForValue(BI->getCondition());
1367  if (CmpReg == 0) return false;
1368
1369  // We've been divorced from our compare!  Our block was split, and
1370  // now our compare lives in a predecessor block.  We musn't
1371  // re-compare here, as the children of the compare aren't guaranteed
1372  // live across the block boundary (we *could* check for this).
1373  // Regardless, the compare has been done in the predecessor block,
1374  // and it left a value for us in a virtual register.  Ergo, we test
1375  // the one-bit value left in the virtual register.
1376  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1377  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
1378                  .addReg(CmpReg).addImm(1));
1379
1380  unsigned CCMode = ARMCC::NE;
1381  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1382    std::swap(TBB, FBB);
1383    CCMode = ARMCC::EQ;
1384  }
1385
1386  unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1387  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1388                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1389  FastEmitBranch(FBB, DL);
1390  FuncInfo.MBB->addSuccessor(TBB);
1391  return true;
1392}
1393
1394bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1395  unsigned AddrReg = getRegForValue(I->getOperand(0));
1396  if (AddrReg == 0) return false;
1397
1398  unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1399  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
1400                  .addReg(AddrReg));
1401
1402  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1403  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
1404    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
1405
1406  return true;
1407}
1408
1409bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1410                             bool isZExt) {
1411  Type *Ty = Src1Value->getType();
1412  EVT SrcEVT = TLI.getValueType(Ty, true);
1413  if (!SrcEVT.isSimple()) return false;
1414  MVT SrcVT = SrcEVT.getSimpleVT();
1415
1416  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
1417  if (isFloat && !Subtarget->hasVFP2())
1418    return false;
1419
1420  // Check to see if the 2nd operand is a constant that we can encode directly
1421  // in the compare.
1422  int Imm = 0;
1423  bool UseImm = false;
1424  bool isNegativeImm = false;
1425  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1426  // Thus, Src1Value may be a ConstantInt, but we're missing it.
1427  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1428    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1429        SrcVT == MVT::i1) {
1430      const APInt &CIVal = ConstInt->getValue();
1431      Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1432      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1433      // then a cmn, because there is no way to represent 2147483648 as a
1434      // signed 32-bit int.
1435      if (Imm < 0 && Imm != (int)0x80000000) {
1436        isNegativeImm = true;
1437        Imm = -Imm;
1438      }
1439      UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1440        (ARM_AM::getSOImmVal(Imm) != -1);
1441    }
1442  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1443    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1444      if (ConstFP->isZero() && !ConstFP->isNegative())
1445        UseImm = true;
1446  }
1447
1448  unsigned CmpOpc;
1449  bool isICmp = true;
1450  bool needsExt = false;
1451  switch (SrcVT.SimpleTy) {
1452    default: return false;
1453    // TODO: Verify compares.
1454    case MVT::f32:
1455      isICmp = false;
1456      CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
1457      break;
1458    case MVT::f64:
1459      isICmp = false;
1460      CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1461      break;
1462    case MVT::i1:
1463    case MVT::i8:
1464    case MVT::i16:
1465      needsExt = true;
1466    // Intentional fall-through.
1467    case MVT::i32:
1468      if (isThumb2) {
1469        if (!UseImm)
1470          CmpOpc = ARM::t2CMPrr;
1471        else
1472          CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1473      } else {
1474        if (!UseImm)
1475          CmpOpc = ARM::CMPrr;
1476        else
1477          CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1478      }
1479      break;
1480  }
1481
1482  unsigned SrcReg1 = getRegForValue(Src1Value);
1483  if (SrcReg1 == 0) return false;
1484
1485  unsigned SrcReg2 = 0;
1486  if (!UseImm) {
1487    SrcReg2 = getRegForValue(Src2Value);
1488    if (SrcReg2 == 0) return false;
1489  }
1490
1491  // We have i1, i8, or i16, we need to either zero extend or sign extend.
1492  if (needsExt) {
1493    SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1494    if (SrcReg1 == 0) return false;
1495    if (!UseImm) {
1496      SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1497      if (SrcReg2 == 0) return false;
1498    }
1499  }
1500
1501  if (!UseImm) {
1502    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1503                            TII.get(CmpOpc))
1504                    .addReg(SrcReg1).addReg(SrcReg2));
1505  } else {
1506    MachineInstrBuilder MIB;
1507    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1508      .addReg(SrcReg1);
1509
1510    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1511    if (isICmp)
1512      MIB.addImm(Imm);
1513    AddOptionalDefs(MIB);
1514  }
1515
1516  // For floating point we need to move the result to a comparison register
1517  // that we can then use for branches.
1518  if (Ty->isFloatTy() || Ty->isDoubleTy())
1519    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1520                            TII.get(ARM::FMSTAT)));
1521  return true;
1522}
1523
1524bool ARMFastISel::SelectCmp(const Instruction *I) {
1525  const CmpInst *CI = cast<CmpInst>(I);
1526
1527  // Get the compare predicate.
1528  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1529
1530  // We may not handle every CC for now.
1531  if (ARMPred == ARMCC::AL) return false;
1532
1533  // Emit the compare.
1534  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1535    return false;
1536
1537  // Now set a register based on the comparison. Explicitly set the predicates
1538  // here.
1539  unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1540  const TargetRegisterClass *RC = isThumb2 ?
1541    (const TargetRegisterClass*)&ARM::rGPRRegClass :
1542    (const TargetRegisterClass*)&ARM::GPRRegClass;
1543  unsigned DestReg = createResultReg(RC);
1544  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1545  unsigned ZeroReg = TargetMaterializeConstant(Zero);
1546  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1547  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
1548          .addReg(ZeroReg).addImm(1)
1549          .addImm(ARMPred).addReg(ARM::CPSR);
1550
1551  UpdateValueMap(I, DestReg);
1552  return true;
1553}
1554
1555bool ARMFastISel::SelectFPExt(const Instruction *I) {
1556  // Make sure we have VFP and that we're extending float to double.
1557  if (!Subtarget->hasVFP2()) return false;
1558
1559  Value *V = I->getOperand(0);
1560  if (!I->getType()->isDoubleTy() ||
1561      !V->getType()->isFloatTy()) return false;
1562
1563  unsigned Op = getRegForValue(V);
1564  if (Op == 0) return false;
1565
1566  unsigned Result = createResultReg(&ARM::DPRRegClass);
1567  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1568                          TII.get(ARM::VCVTDS), Result)
1569                  .addReg(Op));
1570  UpdateValueMap(I, Result);
1571  return true;
1572}
1573
1574bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1575  // Make sure we have VFP and that we're truncating double to float.
1576  if (!Subtarget->hasVFP2()) return false;
1577
1578  Value *V = I->getOperand(0);
1579  if (!(I->getType()->isFloatTy() &&
1580        V->getType()->isDoubleTy())) return false;
1581
1582  unsigned Op = getRegForValue(V);
1583  if (Op == 0) return false;
1584
1585  unsigned Result = createResultReg(&ARM::SPRRegClass);
1586  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1587                          TII.get(ARM::VCVTSD), Result)
1588                  .addReg(Op));
1589  UpdateValueMap(I, Result);
1590  return true;
1591}
1592
1593bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1594  // Make sure we have VFP.
1595  if (!Subtarget->hasVFP2()) return false;
1596
1597  MVT DstVT;
1598  Type *Ty = I->getType();
1599  if (!isTypeLegal(Ty, DstVT))
1600    return false;
1601
1602  Value *Src = I->getOperand(0);
1603  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
1604  if (!SrcEVT.isSimple())
1605    return false;
1606  MVT SrcVT = SrcEVT.getSimpleVT();
1607  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1608    return false;
1609
1610  unsigned SrcReg = getRegForValue(Src);
1611  if (SrcReg == 0) return false;
1612
1613  // Handle sign-extension.
1614  if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1615    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
1616                                       /*isZExt*/!isSigned);
1617    if (SrcReg == 0) return false;
1618  }
1619
1620  // The conversion routine works on fp-reg to fp-reg and the operand above
1621  // was an integer, move it to the fp registers if possible.
1622  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1623  if (FP == 0) return false;
1624
1625  unsigned Opc;
1626  if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1627  else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1628  else return false;
1629
1630  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1631  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1632                          ResultReg)
1633                  .addReg(FP));
1634  UpdateValueMap(I, ResultReg);
1635  return true;
1636}
1637
1638bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1639  // Make sure we have VFP.
1640  if (!Subtarget->hasVFP2()) return false;
1641
1642  MVT DstVT;
1643  Type *RetTy = I->getType();
1644  if (!isTypeLegal(RetTy, DstVT))
1645    return false;
1646
1647  unsigned Op = getRegForValue(I->getOperand(0));
1648  if (Op == 0) return false;
1649
1650  unsigned Opc;
1651  Type *OpTy = I->getOperand(0)->getType();
1652  if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1653  else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1654  else return false;
1655
1656  // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1657  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1658  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1659                          ResultReg)
1660                  .addReg(Op));
1661
1662  // This result needs to be in an integer register, but the conversion only
1663  // takes place in fp-regs.
1664  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1665  if (IntReg == 0) return false;
1666
1667  UpdateValueMap(I, IntReg);
1668  return true;
1669}
1670
1671bool ARMFastISel::SelectSelect(const Instruction *I) {
1672  MVT VT;
1673  if (!isTypeLegal(I->getType(), VT))
1674    return false;
1675
1676  // Things need to be register sized for register moves.
1677  if (VT != MVT::i32) return false;
1678
1679  unsigned CondReg = getRegForValue(I->getOperand(0));
1680  if (CondReg == 0) return false;
1681  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1682  if (Op1Reg == 0) return false;
1683
1684  // Check to see if we can use an immediate in the conditional move.
1685  int Imm = 0;
1686  bool UseImm = false;
1687  bool isNegativeImm = false;
1688  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1689    assert (VT == MVT::i32 && "Expecting an i32.");
1690    Imm = (int)ConstInt->getValue().getZExtValue();
1691    if (Imm < 0) {
1692      isNegativeImm = true;
1693      Imm = ~Imm;
1694    }
1695    UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1696      (ARM_AM::getSOImmVal(Imm) != -1);
1697  }
1698
1699  unsigned Op2Reg = 0;
1700  if (!UseImm) {
1701    Op2Reg = getRegForValue(I->getOperand(2));
1702    if (Op2Reg == 0) return false;
1703  }
1704
1705  unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
1706  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1707                  .addReg(CondReg).addImm(0));
1708
1709  unsigned MovCCOpc;
1710  const TargetRegisterClass *RC;
1711  if (!UseImm) {
1712    RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1713    MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1714  } else {
1715    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1716    if (!isNegativeImm)
1717      MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1718    else
1719      MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1720  }
1721  unsigned ResultReg = createResultReg(RC);
1722  if (!UseImm)
1723    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1724    .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
1725  else
1726    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1727    .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
1728  UpdateValueMap(I, ResultReg);
1729  return true;
1730}
1731
1732bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1733  MVT VT;
1734  Type *Ty = I->getType();
1735  if (!isTypeLegal(Ty, VT))
1736    return false;
1737
1738  // If we have integer div support we should have selected this automagically.
1739  // In case we have a real miss go ahead and return false and we'll pick
1740  // it up later.
1741  if (Subtarget->hasDivide()) return false;
1742
1743  // Otherwise emit a libcall.
1744  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1745  if (VT == MVT::i8)
1746    LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1747  else if (VT == MVT::i16)
1748    LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1749  else if (VT == MVT::i32)
1750    LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1751  else if (VT == MVT::i64)
1752    LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1753  else if (VT == MVT::i128)
1754    LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1755  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1756
1757  return ARMEmitLibcall(I, LC);
1758}
1759
1760bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1761  MVT VT;
1762  Type *Ty = I->getType();
1763  if (!isTypeLegal(Ty, VT))
1764    return false;
1765
1766  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1767  if (VT == MVT::i8)
1768    LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1769  else if (VT == MVT::i16)
1770    LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1771  else if (VT == MVT::i32)
1772    LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1773  else if (VT == MVT::i64)
1774    LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1775  else if (VT == MVT::i128)
1776    LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1777  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1778
1779  return ARMEmitLibcall(I, LC);
1780}
1781
1782bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1783  EVT DestVT  = TLI.getValueType(I->getType(), true);
1784
1785  // We can get here in the case when we have a binary operation on a non-legal
1786  // type and the target independent selector doesn't know how to handle it.
1787  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1788    return false;
1789
1790  unsigned Opc;
1791  switch (ISDOpcode) {
1792    default: return false;
1793    case ISD::ADD:
1794      Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1795      break;
1796    case ISD::OR:
1797      Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1798      break;
1799    case ISD::SUB:
1800      Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1801      break;
1802  }
1803
1804  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1805  if (SrcReg1 == 0) return false;
1806
1807  // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1808  // in the instruction, rather then materializing the value in a register.
1809  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1810  if (SrcReg2 == 0) return false;
1811
1812  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
1813  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1814                          TII.get(Opc), ResultReg)
1815                  .addReg(SrcReg1).addReg(SrcReg2));
1816  UpdateValueMap(I, ResultReg);
1817  return true;
1818}
1819
1820bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1821  EVT FPVT = TLI.getValueType(I->getType(), true);
1822  if (!FPVT.isSimple()) return false;
1823  MVT VT = FPVT.getSimpleVT();
1824
1825  // We can get here in the case when we want to use NEON for our fp
1826  // operations, but can't figure out how to. Just use the vfp instructions
1827  // if we have them.
1828  // FIXME: It'd be nice to use NEON instructions.
1829  Type *Ty = I->getType();
1830  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1831  if (isFloat && !Subtarget->hasVFP2())
1832    return false;
1833
1834  unsigned Opc;
1835  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1836  switch (ISDOpcode) {
1837    default: return false;
1838    case ISD::FADD:
1839      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1840      break;
1841    case ISD::FSUB:
1842      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1843      break;
1844    case ISD::FMUL:
1845      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1846      break;
1847  }
1848  unsigned Op1 = getRegForValue(I->getOperand(0));
1849  if (Op1 == 0) return false;
1850
1851  unsigned Op2 = getRegForValue(I->getOperand(1));
1852  if (Op2 == 0) return false;
1853
1854  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
1855  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1856                          TII.get(Opc), ResultReg)
1857                  .addReg(Op1).addReg(Op2));
1858  UpdateValueMap(I, ResultReg);
1859  return true;
1860}
1861
1862// Call Handling Code
1863
1864// This is largely taken directly from CCAssignFnForNode
1865// TODO: We may not support all of this.
1866CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1867                                           bool Return,
1868                                           bool isVarArg) {
1869  switch (CC) {
1870  default:
1871    llvm_unreachable("Unsupported calling convention");
1872  case CallingConv::Fast:
1873    if (Subtarget->hasVFP2() && !isVarArg) {
1874      if (!Subtarget->isAAPCS_ABI())
1875        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1876      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1877      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1878    }
1879    // Fallthrough
1880  case CallingConv::C:
1881    // Use target triple & subtarget features to do actual dispatch.
1882    if (Subtarget->isAAPCS_ABI()) {
1883      if (Subtarget->hasVFP2() &&
1884          TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1885        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1886      else
1887        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1888    } else
1889        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1890  case CallingConv::ARM_AAPCS_VFP:
1891    if (!isVarArg)
1892      return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1893    // Fall through to soft float variant, variadic functions don't
1894    // use hard floating point ABI.
1895  case CallingConv::ARM_AAPCS:
1896    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1897  case CallingConv::ARM_APCS:
1898    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1899  case CallingConv::GHC:
1900    if (Return)
1901      llvm_unreachable("Can't return in GHC call convention");
1902    else
1903      return CC_ARM_APCS_GHC;
1904  }
1905}
1906
1907bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1908                                  SmallVectorImpl<unsigned> &ArgRegs,
1909                                  SmallVectorImpl<MVT> &ArgVTs,
1910                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1911                                  SmallVectorImpl<unsigned> &RegArgs,
1912                                  CallingConv::ID CC,
1913                                  unsigned &NumBytes,
1914                                  bool isVarArg) {
1915  SmallVector<CCValAssign, 16> ArgLocs;
1916  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
1917  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
1918                             CCAssignFnForCall(CC, false, isVarArg));
1919
1920  // Check that we can handle all of the arguments. If we can't, then bail out
1921  // now before we add code to the MBB.
1922  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1923    CCValAssign &VA = ArgLocs[i];
1924    MVT ArgVT = ArgVTs[VA.getValNo()];
1925
1926    // We don't handle NEON/vector parameters yet.
1927    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1928      return false;
1929
1930    // Now copy/store arg to correct locations.
1931    if (VA.isRegLoc() && !VA.needsCustom()) {
1932      continue;
1933    } else if (VA.needsCustom()) {
1934      // TODO: We need custom lowering for vector (v2f64) args.
1935      if (VA.getLocVT() != MVT::f64 ||
1936          // TODO: Only handle register args for now.
1937          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1938        return false;
1939    } else {
1940      switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
1941      default:
1942        return false;
1943      case MVT::i1:
1944      case MVT::i8:
1945      case MVT::i16:
1946      case MVT::i32:
1947        break;
1948      case MVT::f32:
1949        if (!Subtarget->hasVFP2())
1950          return false;
1951        break;
1952      case MVT::f64:
1953        if (!Subtarget->hasVFP2())
1954          return false;
1955        break;
1956      }
1957    }
1958  }
1959
1960  // At the point, we are able to handle the call's arguments in fast isel.
1961
1962  // Get a count of how many bytes are to be pushed on the stack.
1963  NumBytes = CCInfo.getNextStackOffset();
1964
1965  // Issue CALLSEQ_START
1966  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1967  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1968                          TII.get(AdjStackDown))
1969                  .addImm(NumBytes));
1970
1971  // Process the args.
1972  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1973    CCValAssign &VA = ArgLocs[i];
1974    unsigned Arg = ArgRegs[VA.getValNo()];
1975    MVT ArgVT = ArgVTs[VA.getValNo()];
1976
1977    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1978           "We don't handle NEON/vector parameters yet.");
1979
1980    // Handle arg promotion, etc.
1981    switch (VA.getLocInfo()) {
1982      case CCValAssign::Full: break;
1983      case CCValAssign::SExt: {
1984        MVT DestVT = VA.getLocVT();
1985        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1986        assert (Arg != 0 && "Failed to emit a sext");
1987        ArgVT = DestVT;
1988        break;
1989      }
1990      case CCValAssign::AExt:
1991        // Intentional fall-through.  Handle AExt and ZExt.
1992      case CCValAssign::ZExt: {
1993        MVT DestVT = VA.getLocVT();
1994        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1995        assert (Arg != 0 && "Failed to emit a zext");
1996        ArgVT = DestVT;
1997        break;
1998      }
1999      case CCValAssign::BCvt: {
2000        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
2001                                 /*TODO: Kill=*/false);
2002        assert(BC != 0 && "Failed to emit a bitcast!");
2003        Arg = BC;
2004        ArgVT = VA.getLocVT();
2005        break;
2006      }
2007      default: llvm_unreachable("Unknown arg promotion!");
2008    }
2009
2010    // Now copy/store arg to correct locations.
2011    if (VA.isRegLoc() && !VA.needsCustom()) {
2012      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2013              VA.getLocReg())
2014        .addReg(Arg);
2015      RegArgs.push_back(VA.getLocReg());
2016    } else if (VA.needsCustom()) {
2017      // TODO: We need custom lowering for vector (v2f64) args.
2018      assert(VA.getLocVT() == MVT::f64 &&
2019             "Custom lowering for v2f64 args not available");
2020
2021      CCValAssign &NextVA = ArgLocs[++i];
2022
2023      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2024             "We only handle register args!");
2025
2026      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2027                              TII.get(ARM::VMOVRRD), VA.getLocReg())
2028                      .addReg(NextVA.getLocReg(), RegState::Define)
2029                      .addReg(Arg));
2030      RegArgs.push_back(VA.getLocReg());
2031      RegArgs.push_back(NextVA.getLocReg());
2032    } else {
2033      assert(VA.isMemLoc());
2034      // Need to store on the stack.
2035      Address Addr;
2036      Addr.BaseType = Address::RegBase;
2037      Addr.Base.Reg = ARM::SP;
2038      Addr.Offset = VA.getLocMemOffset();
2039
2040      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
2041      assert(EmitRet && "Could not emit a store for argument!");
2042    }
2043  }
2044
2045  return true;
2046}
2047
2048bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
2049                             const Instruction *I, CallingConv::ID CC,
2050                             unsigned &NumBytes, bool isVarArg) {
2051  // Issue CALLSEQ_END
2052  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2053  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2054                          TII.get(AdjStackUp))
2055                  .addImm(NumBytes).addImm(0));
2056
2057  // Now the return value.
2058  if (RetVT != MVT::isVoid) {
2059    SmallVector<CCValAssign, 16> RVLocs;
2060    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
2061    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2062
2063    // Copy all of the result registers out of their specified physreg.
2064    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
2065      // For this move we copy into two registers and then move into the
2066      // double fp reg we want.
2067      MVT DestVT = RVLocs[0].getValVT();
2068      const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2069      unsigned ResultReg = createResultReg(DstRC);
2070      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2071                              TII.get(ARM::VMOVDRR), ResultReg)
2072                      .addReg(RVLocs[0].getLocReg())
2073                      .addReg(RVLocs[1].getLocReg()));
2074
2075      UsedRegs.push_back(RVLocs[0].getLocReg());
2076      UsedRegs.push_back(RVLocs[1].getLocReg());
2077
2078      // Finally update the result.
2079      UpdateValueMap(I, ResultReg);
2080    } else {
2081      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2082      MVT CopyVT = RVLocs[0].getValVT();
2083
2084      // Special handling for extended integers.
2085      if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2086        CopyVT = MVT::i32;
2087
2088      const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2089
2090      unsigned ResultReg = createResultReg(DstRC);
2091      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2092              ResultReg).addReg(RVLocs[0].getLocReg());
2093      UsedRegs.push_back(RVLocs[0].getLocReg());
2094
2095      // Finally update the result.
2096      UpdateValueMap(I, ResultReg);
2097    }
2098  }
2099
2100  return true;
2101}
2102
2103bool ARMFastISel::SelectRet(const Instruction *I) {
2104  const ReturnInst *Ret = cast<ReturnInst>(I);
2105  const Function &F = *I->getParent()->getParent();
2106
2107  if (!FuncInfo.CanLowerReturn)
2108    return false;
2109
2110  // Build a list of return value registers.
2111  SmallVector<unsigned, 4> RetRegs;
2112
2113  CallingConv::ID CC = F.getCallingConv();
2114  if (Ret->getNumOperands() > 0) {
2115    SmallVector<ISD::OutputArg, 4> Outs;
2116    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2117
2118    // Analyze operands of the call, assigning locations to each operand.
2119    SmallVector<CCValAssign, 16> ValLocs;
2120    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
2121    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
2122                                                 F.isVarArg()));
2123
2124    const Value *RV = Ret->getOperand(0);
2125    unsigned Reg = getRegForValue(RV);
2126    if (Reg == 0)
2127      return false;
2128
2129    // Only handle a single return value for now.
2130    if (ValLocs.size() != 1)
2131      return false;
2132
2133    CCValAssign &VA = ValLocs[0];
2134
2135    // Don't bother handling odd stuff for now.
2136    if (VA.getLocInfo() != CCValAssign::Full)
2137      return false;
2138    // Only handle register returns for now.
2139    if (!VA.isRegLoc())
2140      return false;
2141
2142    unsigned SrcReg = Reg + VA.getValNo();
2143    EVT RVEVT = TLI.getValueType(RV->getType());
2144    if (!RVEVT.isSimple()) return false;
2145    MVT RVVT = RVEVT.getSimpleVT();
2146    MVT DestVT = VA.getValVT();
2147    // Special handling for extended integers.
2148    if (RVVT != DestVT) {
2149      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2150        return false;
2151
2152      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2153
2154      // Perform extension if flagged as either zext or sext.  Otherwise, do
2155      // nothing.
2156      if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2157        SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2158        if (SrcReg == 0) return false;
2159      }
2160    }
2161
2162    // Make the copy.
2163    unsigned DstReg = VA.getLocReg();
2164    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2165    // Avoid a cross-class copy. This is very unlikely.
2166    if (!SrcRC->contains(DstReg))
2167      return false;
2168    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2169            DstReg).addReg(SrcReg);
2170
2171    // Add register to return instruction.
2172    RetRegs.push_back(VA.getLocReg());
2173  }
2174
2175  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
2176  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2177                                    TII.get(RetOpc));
2178  AddOptionalDefs(MIB);
2179  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2180    MIB.addReg(RetRegs[i], RegState::Implicit);
2181  return true;
2182}
2183
2184unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2185  if (UseReg)
2186    return isThumb2 ? ARM::tBLXr : ARM::BLX;
2187  else
2188    return isThumb2 ? ARM::tBL : ARM::BL;
2189}
2190
2191unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2192  GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
2193                                       GlobalValue::ExternalLinkage, 0, Name);
2194  EVT LCREVT = TLI.getValueType(GV->getType());
2195  if (!LCREVT.isSimple()) return 0;
2196  return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
2197}
2198
2199// A quick function that will emit a call for a named libcall in F with the
2200// vector of passed arguments for the Instruction in I. We can assume that we
2201// can emit a call for any libcall we can produce. This is an abridged version
2202// of the full call infrastructure since we won't need to worry about things
2203// like computed function pointers or strange arguments at call sites.
2204// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2205// with X86.
2206bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2207  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2208
2209  // Handle *simple* calls for now.
2210  Type *RetTy = I->getType();
2211  MVT RetVT;
2212  if (RetTy->isVoidTy())
2213    RetVT = MVT::isVoid;
2214  else if (!isTypeLegal(RetTy, RetVT))
2215    return false;
2216
2217  // Can't handle non-double multi-reg retvals.
2218  if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2219    SmallVector<CCValAssign, 16> RVLocs;
2220    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
2221    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
2222    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2223      return false;
2224  }
2225
2226  // Set up the argument vectors.
2227  SmallVector<Value*, 8> Args;
2228  SmallVector<unsigned, 8> ArgRegs;
2229  SmallVector<MVT, 8> ArgVTs;
2230  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2231  Args.reserve(I->getNumOperands());
2232  ArgRegs.reserve(I->getNumOperands());
2233  ArgVTs.reserve(I->getNumOperands());
2234  ArgFlags.reserve(I->getNumOperands());
2235  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
2236    Value *Op = I->getOperand(i);
2237    unsigned Arg = getRegForValue(Op);
2238    if (Arg == 0) return false;
2239
2240    Type *ArgTy = Op->getType();
2241    MVT ArgVT;
2242    if (!isTypeLegal(ArgTy, ArgVT)) return false;
2243
2244    ISD::ArgFlagsTy Flags;
2245    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2246    Flags.setOrigAlign(OriginalAlignment);
2247
2248    Args.push_back(Op);
2249    ArgRegs.push_back(Arg);
2250    ArgVTs.push_back(ArgVT);
2251    ArgFlags.push_back(Flags);
2252  }
2253
2254  // Handle the arguments now that we've gotten them.
2255  SmallVector<unsigned, 4> RegArgs;
2256  unsigned NumBytes;
2257  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2258                       RegArgs, CC, NumBytes, false))
2259    return false;
2260
2261  unsigned CalleeReg = 0;
2262  if (EnableARMLongCalls) {
2263    CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2264    if (CalleeReg == 0) return false;
2265  }
2266
2267  // Issue the call.
2268  unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
2269  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2270                                    DL, TII.get(CallOpc));
2271  // BL / BLX don't take a predicate, but tBL / tBLX do.
2272  if (isThumb2)
2273    AddDefaultPred(MIB);
2274  if (EnableARMLongCalls)
2275    MIB.addReg(CalleeReg);
2276  else
2277    MIB.addExternalSymbol(TLI.getLibcallName(Call));
2278
2279  // Add implicit physical register uses to the call.
2280  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2281    MIB.addReg(RegArgs[i], RegState::Implicit);
2282
2283  // Add a register mask with the call-preserved registers.
2284  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2285  MIB.addRegMask(TRI.getCallPreservedMask(CC));
2286
2287  // Finish off the call including any return values.
2288  SmallVector<unsigned, 4> UsedRegs;
2289  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
2290
2291  // Set all unused physreg defs as dead.
2292  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2293
2294  return true;
2295}
2296
2297bool ARMFastISel::SelectCall(const Instruction *I,
2298                             const char *IntrMemName = 0) {
2299  const CallInst *CI = cast<CallInst>(I);
2300  const Value *Callee = CI->getCalledValue();
2301
2302  // Can't handle inline asm.
2303  if (isa<InlineAsm>(Callee)) return false;
2304
2305  // Allow SelectionDAG isel to handle tail calls.
2306  if (CI->isTailCall()) return false;
2307
2308  // Check the calling convention.
2309  ImmutableCallSite CS(CI);
2310  CallingConv::ID CC = CS.getCallingConv();
2311
2312  // TODO: Avoid some calling conventions?
2313
2314  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
2315  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
2316  bool isVarArg = FTy->isVarArg();
2317
2318  // Handle *simple* calls for now.
2319  Type *RetTy = I->getType();
2320  MVT RetVT;
2321  if (RetTy->isVoidTy())
2322    RetVT = MVT::isVoid;
2323  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2324           RetVT != MVT::i8  && RetVT != MVT::i1)
2325    return false;
2326
2327  // Can't handle non-double multi-reg retvals.
2328  if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2329      RetVT != MVT::i16 && RetVT != MVT::i32) {
2330    SmallVector<CCValAssign, 16> RVLocs;
2331    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
2332    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2333    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2334      return false;
2335  }
2336
2337  // Set up the argument vectors.
2338  SmallVector<Value*, 8> Args;
2339  SmallVector<unsigned, 8> ArgRegs;
2340  SmallVector<MVT, 8> ArgVTs;
2341  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2342  unsigned arg_size = CS.arg_size();
2343  Args.reserve(arg_size);
2344  ArgRegs.reserve(arg_size);
2345  ArgVTs.reserve(arg_size);
2346  ArgFlags.reserve(arg_size);
2347  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2348       i != e; ++i) {
2349    // If we're lowering a memory intrinsic instead of a regular call, skip the
2350    // last two arguments, which shouldn't be passed to the underlying function.
2351    if (IntrMemName && e-i <= 2)
2352      break;
2353
2354    ISD::ArgFlagsTy Flags;
2355    unsigned AttrInd = i - CS.arg_begin() + 1;
2356    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
2357      Flags.setSExt();
2358    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
2359      Flags.setZExt();
2360
2361    // FIXME: Only handle *easy* calls for now.
2362    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
2363        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
2364        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
2365        CS.paramHasAttr(AttrInd, Attribute::ByVal))
2366      return false;
2367
2368    Type *ArgTy = (*i)->getType();
2369    MVT ArgVT;
2370    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2371        ArgVT != MVT::i1)
2372      return false;
2373
2374    unsigned Arg = getRegForValue(*i);
2375    if (Arg == 0)
2376      return false;
2377
2378    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2379    Flags.setOrigAlign(OriginalAlignment);
2380
2381    Args.push_back(*i);
2382    ArgRegs.push_back(Arg);
2383    ArgVTs.push_back(ArgVT);
2384    ArgFlags.push_back(Flags);
2385  }
2386
2387  // Handle the arguments now that we've gotten them.
2388  SmallVector<unsigned, 4> RegArgs;
2389  unsigned NumBytes;
2390  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2391                       RegArgs, CC, NumBytes, isVarArg))
2392    return false;
2393
2394  bool UseReg = false;
2395  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2396  if (!GV || EnableARMLongCalls) UseReg = true;
2397
2398  unsigned CalleeReg = 0;
2399  if (UseReg) {
2400    if (IntrMemName)
2401      CalleeReg = getLibcallReg(IntrMemName);
2402    else
2403      CalleeReg = getRegForValue(Callee);
2404
2405    if (CalleeReg == 0) return false;
2406  }
2407
2408  // Issue the call.
2409  unsigned CallOpc = ARMSelectCallOp(UseReg);
2410  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2411                                    DL, TII.get(CallOpc));
2412
2413  // ARM calls don't take a predicate, but tBL / tBLX do.
2414  if(isThumb2)
2415    AddDefaultPred(MIB);
2416  if (UseReg)
2417    MIB.addReg(CalleeReg);
2418  else if (!IntrMemName)
2419    MIB.addGlobalAddress(GV, 0, 0);
2420  else
2421    MIB.addExternalSymbol(IntrMemName, 0);
2422
2423  // Add implicit physical register uses to the call.
2424  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2425    MIB.addReg(RegArgs[i], RegState::Implicit);
2426
2427  // Add a register mask with the call-preserved registers.
2428  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2429  MIB.addRegMask(TRI.getCallPreservedMask(CC));
2430
2431  // Finish off the call including any return values.
2432  SmallVector<unsigned, 4> UsedRegs;
2433  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2434    return false;
2435
2436  // Set all unused physreg defs as dead.
2437  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2438
2439  return true;
2440}
2441
2442bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2443  return Len <= 16;
2444}
2445
2446bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2447                                        uint64_t Len, unsigned Alignment) {
2448  // Make sure we don't bloat code by inlining very large memcpy's.
2449  if (!ARMIsMemCpySmall(Len))
2450    return false;
2451
2452  while (Len) {
2453    MVT VT;
2454    if (!Alignment || Alignment >= 4) {
2455      if (Len >= 4)
2456        VT = MVT::i32;
2457      else if (Len >= 2)
2458        VT = MVT::i16;
2459      else {
2460        assert (Len == 1 && "Expected a length of 1!");
2461        VT = MVT::i8;
2462      }
2463    } else {
2464      // Bound based on alignment.
2465      if (Len >= 2 && Alignment == 2)
2466        VT = MVT::i16;
2467      else {
2468        VT = MVT::i8;
2469      }
2470    }
2471
2472    bool RV;
2473    unsigned ResultReg;
2474    RV = ARMEmitLoad(VT, ResultReg, Src);
2475    assert (RV == true && "Should be able to handle this load.");
2476    RV = ARMEmitStore(VT, ResultReg, Dest);
2477    assert (RV == true && "Should be able to handle this store.");
2478    (void)RV;
2479
2480    unsigned Size = VT.getSizeInBits()/8;
2481    Len -= Size;
2482    Dest.Offset += Size;
2483    Src.Offset += Size;
2484  }
2485
2486  return true;
2487}
2488
2489bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2490  // FIXME: Handle more intrinsics.
2491  switch (I.getIntrinsicID()) {
2492  default: return false;
2493  case Intrinsic::frameaddress: {
2494    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2495    MFI->setFrameAddressIsTaken(true);
2496
2497    unsigned LdrOpc;
2498    const TargetRegisterClass *RC;
2499    if (isThumb2) {
2500      LdrOpc =  ARM::t2LDRi12;
2501      RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
2502    } else {
2503      LdrOpc =  ARM::LDRi12;
2504      RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
2505    }
2506
2507    const ARMBaseRegisterInfo *RegInfo =
2508          static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
2509    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2510    unsigned SrcReg = FramePtr;
2511
2512    // Recursively load frame address
2513    // ldr r0 [fp]
2514    // ldr r0 [r0]
2515    // ldr r0 [r0]
2516    // ...
2517    unsigned DestReg;
2518    unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2519    while (Depth--) {
2520      DestReg = createResultReg(RC);
2521      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2522                              TII.get(LdrOpc), DestReg)
2523                      .addReg(SrcReg).addImm(0));
2524      SrcReg = DestReg;
2525    }
2526    UpdateValueMap(&I, SrcReg);
2527    return true;
2528  }
2529  case Intrinsic::memcpy:
2530  case Intrinsic::memmove: {
2531    const MemTransferInst &MTI = cast<MemTransferInst>(I);
2532    // Don't handle volatile.
2533    if (MTI.isVolatile())
2534      return false;
2535
2536    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2537    // we would emit dead code because we don't currently handle memmoves.
2538    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2539    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2540      // Small memcpy's are common enough that we want to do them without a call
2541      // if possible.
2542      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2543      if (ARMIsMemCpySmall(Len)) {
2544        Address Dest, Src;
2545        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2546            !ARMComputeAddress(MTI.getRawSource(), Src))
2547          return false;
2548        unsigned Alignment = MTI.getAlignment();
2549        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2550          return true;
2551      }
2552    }
2553
2554    if (!MTI.getLength()->getType()->isIntegerTy(32))
2555      return false;
2556
2557    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2558      return false;
2559
2560    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2561    return SelectCall(&I, IntrMemName);
2562  }
2563  case Intrinsic::memset: {
2564    const MemSetInst &MSI = cast<MemSetInst>(I);
2565    // Don't handle volatile.
2566    if (MSI.isVolatile())
2567      return false;
2568
2569    if (!MSI.getLength()->getType()->isIntegerTy(32))
2570      return false;
2571
2572    if (MSI.getDestAddressSpace() > 255)
2573      return false;
2574
2575    return SelectCall(&I, "memset");
2576  }
2577  case Intrinsic::trap: {
2578    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(
2579      Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
2580    return true;
2581  }
2582  }
2583}
2584
2585bool ARMFastISel::SelectTrunc(const Instruction *I) {
2586  // The high bits for a type smaller than the register size are assumed to be
2587  // undefined.
2588  Value *Op = I->getOperand(0);
2589
2590  EVT SrcVT, DestVT;
2591  SrcVT = TLI.getValueType(Op->getType(), true);
2592  DestVT = TLI.getValueType(I->getType(), true);
2593
2594  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2595    return false;
2596  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2597    return false;
2598
2599  unsigned SrcReg = getRegForValue(Op);
2600  if (!SrcReg) return false;
2601
2602  // Because the high bits are undefined, a truncate doesn't generate
2603  // any code.
2604  UpdateValueMap(I, SrcReg);
2605  return true;
2606}
2607
2608unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2609                                    bool isZExt) {
2610  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2611    return 0;
2612  if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2613    return 0;
2614
2615  // Table of which combinations can be emitted as a single instruction,
2616  // and which will require two.
2617  static const uint8_t isSingleInstrTbl[3][2][2][2] = {
2618    //            ARM                     Thumb
2619    //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
2620    //    ext:     s  z      s  z          s  z      s  z
2621    /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2622    /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2623    /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2624  };
2625
2626  // Target registers for:
2627  //  - For ARM can never be PC.
2628  //  - For 16-bit Thumb are restricted to lower 8 registers.
2629  //  - For 32-bit Thumb are restricted to non-SP and non-PC.
2630  static const TargetRegisterClass *RCTbl[2][2] = {
2631    // Instructions: Two                     Single
2632    /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2633    /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
2634  };
2635
2636  // Table governing the instruction(s) to be emitted.
2637  static const struct {
2638    // First entry for each of the following is sext, second zext.
2639    uint16_t Opc[2];
2640    uint8_t Imm[2];   // All instructions have either a shift or a mask.
2641    uint8_t hasS[2];  // Some instructions have an S bit, always set it to 0.
2642  } OpcTbl[2][2][3] = {
2643    { // Two instructions (first is left shift, second is in this table).
2644      { // ARM
2645        /*  1 */ { { ARM::ASRi,   ARM::LSRi    }, {  31,  31 }, { 1, 1 } },
2646        /*  8 */ { { ARM::ASRi,   ARM::LSRi    }, {  24,  24 }, { 1, 1 } },
2647        /* 16 */ { { ARM::ASRi,   ARM::LSRi    }, {  16,  16 }, { 1, 1 } }
2648      },
2649      { // Thumb
2650        /*  1 */ { { ARM::tASRri, ARM::tLSRri  }, {  31,  31 }, { 0, 0 } },
2651        /*  8 */ { { ARM::tASRri, ARM::tLSRri  }, {  24,  24 }, { 0, 0 } },
2652        /* 16 */ { { ARM::tASRri, ARM::tLSRri  }, {  16,  16 }, { 0, 0 } }
2653      }
2654    },
2655    { // Single instruction.
2656      { // ARM
2657        /*  1 */ { { ARM::KILL,   ARM::ANDri   }, {   0,   1 }, { 0, 1 } },
2658        /*  8 */ { { ARM::SXTB,   ARM::ANDri   }, {   0, 255 }, { 0, 1 } },
2659        /* 16 */ { { ARM::SXTH,   ARM::UXTH    }, {   0,   0 }, { 0, 0 } }
2660      },
2661      { // Thumb
2662        /*  1 */ { { ARM::KILL,   ARM::t2ANDri }, {   0,   1 }, { 0, 1 } },
2663        /*  8 */ { { ARM::t2SXTB, ARM::t2ANDri }, {   0, 255 }, { 0, 1 } },
2664        /* 16 */ { { ARM::t2SXTH, ARM::t2UXTH  }, {   0,   0 }, { 0, 0 } }
2665      }
2666    }
2667  };
2668
2669  unsigned SrcBits = SrcVT.getSizeInBits();
2670  unsigned DestBits = DestVT.getSizeInBits();
2671  (void) DestBits;
2672  assert((SrcBits < DestBits) && "can only extend to larger types");
2673  assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
2674         "other sizes unimplemented");
2675  assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
2676         "other sizes unimplemented");
2677
2678  bool hasV6Ops = Subtarget->hasV6Ops();
2679  unsigned Bitness = countTrailingZeros(SrcBits) >> 1;  // {1,8,16}=>{0,1,2}
2680  assert((Bitness < 3) && "sanity-check table bounds");
2681
2682  bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2683  const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2684  unsigned Opc = OpcTbl[isSingleInstr][isThumb2][Bitness].Opc[isZExt];
2685  assert(ARM::KILL != Opc && "Invalid table entry");
2686  unsigned Imm = OpcTbl[isSingleInstr][isThumb2][Bitness].Imm[isZExt];
2687  unsigned hasS = OpcTbl[isSingleInstr][isThumb2][Bitness].hasS[isZExt];
2688
2689  // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2690  bool setsCPSR = &ARM::tGPRRegClass == RC;
2691  unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::LSLi;
2692  unsigned ResultReg;
2693
2694  // Either one or two instructions are emitted.
2695  // They're always of the form:
2696  //   dst = in OP imm
2697  // CPSR is set only by 16-bit Thumb instructions.
2698  // Predicate, if any, is AL.
2699  // S bit, if available, is always 0.
2700  // When two are emitted the first's result will feed as the second's input,
2701  // that value is then dead.
2702  unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
2703  for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
2704    ResultReg = createResultReg(RC);
2705    unsigned Opcode = ((0 == Instr) && !isSingleInstr) ? LSLOpc : Opc;
2706    bool isKill = 1 == Instr;
2707    MachineInstrBuilder MIB = BuildMI(
2708        *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg);
2709    if (setsCPSR)
2710      MIB.addReg(ARM::CPSR, RegState::Define);
2711    AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(Imm));
2712    if (hasS)
2713      AddDefaultCC(MIB);
2714    // Second instruction consumes the first's result.
2715    SrcReg = ResultReg;
2716  }
2717
2718  return ResultReg;
2719}
2720
2721bool ARMFastISel::SelectIntExt(const Instruction *I) {
2722  // On ARM, in general, integer casts don't involve legal types; this code
2723  // handles promotable integers.
2724  Type *DestTy = I->getType();
2725  Value *Src = I->getOperand(0);
2726  Type *SrcTy = Src->getType();
2727
2728  bool isZExt = isa<ZExtInst>(I);
2729  unsigned SrcReg = getRegForValue(Src);
2730  if (!SrcReg) return false;
2731
2732  EVT SrcEVT, DestEVT;
2733  SrcEVT = TLI.getValueType(SrcTy, true);
2734  DestEVT = TLI.getValueType(DestTy, true);
2735  if (!SrcEVT.isSimple()) return false;
2736  if (!DestEVT.isSimple()) return false;
2737
2738  MVT SrcVT = SrcEVT.getSimpleVT();
2739  MVT DestVT = DestEVT.getSimpleVT();
2740  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2741  if (ResultReg == 0) return false;
2742  UpdateValueMap(I, ResultReg);
2743  return true;
2744}
2745
2746bool ARMFastISel::SelectShift(const Instruction *I,
2747                              ARM_AM::ShiftOpc ShiftTy) {
2748  // We handle thumb2 mode by target independent selector
2749  // or SelectionDAG ISel.
2750  if (isThumb2)
2751    return false;
2752
2753  // Only handle i32 now.
2754  EVT DestVT = TLI.getValueType(I->getType(), true);
2755  if (DestVT != MVT::i32)
2756    return false;
2757
2758  unsigned Opc = ARM::MOVsr;
2759  unsigned ShiftImm;
2760  Value *Src2Value = I->getOperand(1);
2761  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
2762    ShiftImm = CI->getZExtValue();
2763
2764    // Fall back to selection DAG isel if the shift amount
2765    // is zero or greater than the width of the value type.
2766    if (ShiftImm == 0 || ShiftImm >=32)
2767      return false;
2768
2769    Opc = ARM::MOVsi;
2770  }
2771
2772  Value *Src1Value = I->getOperand(0);
2773  unsigned Reg1 = getRegForValue(Src1Value);
2774  if (Reg1 == 0) return false;
2775
2776  unsigned Reg2 = 0;
2777  if (Opc == ARM::MOVsr) {
2778    Reg2 = getRegForValue(Src2Value);
2779    if (Reg2 == 0) return false;
2780  }
2781
2782  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
2783  if(ResultReg == 0) return false;
2784
2785  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2786                                    TII.get(Opc), ResultReg)
2787                            .addReg(Reg1);
2788
2789  if (Opc == ARM::MOVsi)
2790    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
2791  else if (Opc == ARM::MOVsr) {
2792    MIB.addReg(Reg2);
2793    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
2794  }
2795
2796  AddOptionalDefs(MIB);
2797  UpdateValueMap(I, ResultReg);
2798  return true;
2799}
2800
2801// TODO: SoftFP support.
2802bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
2803
2804  switch (I->getOpcode()) {
2805    case Instruction::Load:
2806      return SelectLoad(I);
2807    case Instruction::Store:
2808      return SelectStore(I);
2809    case Instruction::Br:
2810      return SelectBranch(I);
2811    case Instruction::IndirectBr:
2812      return SelectIndirectBr(I);
2813    case Instruction::ICmp:
2814    case Instruction::FCmp:
2815      return SelectCmp(I);
2816    case Instruction::FPExt:
2817      return SelectFPExt(I);
2818    case Instruction::FPTrunc:
2819      return SelectFPTrunc(I);
2820    case Instruction::SIToFP:
2821      return SelectIToFP(I, /*isSigned*/ true);
2822    case Instruction::UIToFP:
2823      return SelectIToFP(I, /*isSigned*/ false);
2824    case Instruction::FPToSI:
2825      return SelectFPToI(I, /*isSigned*/ true);
2826    case Instruction::FPToUI:
2827      return SelectFPToI(I, /*isSigned*/ false);
2828    case Instruction::Add:
2829      return SelectBinaryIntOp(I, ISD::ADD);
2830    case Instruction::Or:
2831      return SelectBinaryIntOp(I, ISD::OR);
2832    case Instruction::Sub:
2833      return SelectBinaryIntOp(I, ISD::SUB);
2834    case Instruction::FAdd:
2835      return SelectBinaryFPOp(I, ISD::FADD);
2836    case Instruction::FSub:
2837      return SelectBinaryFPOp(I, ISD::FSUB);
2838    case Instruction::FMul:
2839      return SelectBinaryFPOp(I, ISD::FMUL);
2840    case Instruction::SDiv:
2841      return SelectDiv(I, /*isSigned*/ true);
2842    case Instruction::UDiv:
2843      return SelectDiv(I, /*isSigned*/ false);
2844    case Instruction::SRem:
2845      return SelectRem(I, /*isSigned*/ true);
2846    case Instruction::URem:
2847      return SelectRem(I, /*isSigned*/ false);
2848    case Instruction::Call:
2849      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2850        return SelectIntrinsicCall(*II);
2851      return SelectCall(I);
2852    case Instruction::Select:
2853      return SelectSelect(I);
2854    case Instruction::Ret:
2855      return SelectRet(I);
2856    case Instruction::Trunc:
2857      return SelectTrunc(I);
2858    case Instruction::ZExt:
2859    case Instruction::SExt:
2860      return SelectIntExt(I);
2861    case Instruction::Shl:
2862      return SelectShift(I, ARM_AM::lsl);
2863    case Instruction::LShr:
2864      return SelectShift(I, ARM_AM::lsr);
2865    case Instruction::AShr:
2866      return SelectShift(I, ARM_AM::asr);
2867    default: break;
2868  }
2869  return false;
2870}
2871
2872namespace {
2873// This table describes sign- and zero-extend instructions which can be
2874// folded into a preceding load. All of these extends have an immediate
2875// (sometimes a mask and sometimes a shift) that's applied after
2876// extension.
2877const struct FoldableLoadExtendsStruct {
2878  uint16_t Opc[2];  // ARM, Thumb.
2879  uint8_t ExpectedImm;
2880  uint8_t isZExt     : 1;
2881  uint8_t ExpectedVT : 7;
2882} FoldableLoadExtends[] = {
2883  { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
2884  { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
2885  { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
2886  { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
2887  { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
2888};
2889}
2890
2891/// \brief The specified machine instr operand is a vreg, and that
2892/// vreg is being provided by the specified load instruction.  If possible,
2893/// try to fold the load as an operand to the instruction, returning true if
2894/// successful.
2895bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2896                                      const LoadInst *LI) {
2897  // Verify we have a legal type before going any further.
2898  MVT VT;
2899  if (!isLoadTypeLegal(LI->getType(), VT))
2900    return false;
2901
2902  // Combine load followed by zero- or sign-extend.
2903  // ldrb r1, [r0]       ldrb r1, [r0]
2904  // uxtb r2, r1     =>
2905  // mov  r3, r2         mov  r3, r1
2906  if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
2907    return false;
2908  const uint64_t Imm = MI->getOperand(2).getImm();
2909
2910  bool Found = false;
2911  bool isZExt;
2912  for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
2913       i != e; ++i) {
2914    if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
2915        (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
2916        MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
2917      Found = true;
2918      isZExt = FoldableLoadExtends[i].isZExt;
2919    }
2920  }
2921  if (!Found) return false;
2922
2923  // See if we can handle this address.
2924  Address Addr;
2925  if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2926
2927  unsigned ResultReg = MI->getOperand(0).getReg();
2928  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2929    return false;
2930  MI->eraseFromParent();
2931  return true;
2932}
2933
2934unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
2935                                     unsigned Align, MVT VT) {
2936  bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2937  ARMConstantPoolConstant *CPV =
2938    ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2939  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
2940
2941  unsigned Opc;
2942  unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
2943  // Load value.
2944  if (isThumb2) {
2945    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2946                            TII.get(ARM::t2LDRpci), DestReg1)
2947                    .addConstantPoolIndex(Idx));
2948    Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
2949  } else {
2950    // The extra immediate is for addrmode2.
2951    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2952                            DL, TII.get(ARM::LDRcp), DestReg1)
2953                    .addConstantPoolIndex(Idx).addImm(0));
2954    Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
2955  }
2956
2957  unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
2958  if (GlobalBaseReg == 0) {
2959    GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
2960    AFI->setGlobalBaseReg(GlobalBaseReg);
2961  }
2962
2963  unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
2964  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2965                                    DL, TII.get(Opc), DestReg2)
2966                            .addReg(DestReg1)
2967                            .addReg(GlobalBaseReg);
2968  if (!UseGOTOFF)
2969    MIB.addImm(0);
2970  AddOptionalDefs(MIB);
2971
2972  return DestReg2;
2973}
2974
2975bool ARMFastISel::FastLowerArguments() {
2976  if (!FuncInfo.CanLowerReturn)
2977    return false;
2978
2979  const Function *F = FuncInfo.Fn;
2980  if (F->isVarArg())
2981    return false;
2982
2983  CallingConv::ID CC = F->getCallingConv();
2984  switch (CC) {
2985  default:
2986    return false;
2987  case CallingConv::Fast:
2988  case CallingConv::C:
2989  case CallingConv::ARM_AAPCS_VFP:
2990  case CallingConv::ARM_AAPCS:
2991  case CallingConv::ARM_APCS:
2992    break;
2993  }
2994
2995  // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
2996  // which are passed in r0 - r3.
2997  unsigned Idx = 1;
2998  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
2999       I != E; ++I, ++Idx) {
3000    if (Idx > 4)
3001      return false;
3002
3003    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
3004        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
3005        F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
3006      return false;
3007
3008    Type *ArgTy = I->getType();
3009    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3010      return false;
3011
3012    EVT ArgVT = TLI.getValueType(ArgTy);
3013    if (!ArgVT.isSimple()) return false;
3014    switch (ArgVT.getSimpleVT().SimpleTy) {
3015    case MVT::i8:
3016    case MVT::i16:
3017    case MVT::i32:
3018      break;
3019    default:
3020      return false;
3021    }
3022  }
3023
3024
3025  static const uint16_t GPRArgRegs[] = {
3026    ARM::R0, ARM::R1, ARM::R2, ARM::R3
3027  };
3028
3029  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
3030  Idx = 0;
3031  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3032       I != E; ++I, ++Idx) {
3033    unsigned SrcReg = GPRArgRegs[Idx];
3034    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3035    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3036    // Without this, EmitLiveInCopies may eliminate the livein if its only
3037    // use is a bitcast (which isn't turned into an instruction).
3038    unsigned ResultReg = createResultReg(RC);
3039    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
3040            ResultReg).addReg(DstReg, getKillRegState(true));
3041    UpdateValueMap(I, ResultReg);
3042  }
3043
3044  return true;
3045}
3046
3047namespace llvm {
3048  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3049                                const TargetLibraryInfo *libInfo) {
3050    const TargetMachine &TM = funcInfo.MF->getTarget();
3051
3052    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
3053    // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
3054    bool UseFastISel = false;
3055    UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only();
3056    UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
3057    UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
3058
3059    if (UseFastISel) {
3060      // iOS always has a FP for backtracking, force other targets
3061      // to keep their FP when doing FastISel. The emitted code is
3062      // currently superior, and in cases like test-suite's lencod
3063      // FastISel isn't quite correct when FP is eliminated.
3064      TM.Options.NoFramePointerElim = true;
3065      return new ARMFastISel(funcInfo, libInfo);
3066    }
3067    return 0;
3068  }
3069}
3070